VirtRegMap: Replace some identity copies with KILL instructions.

An identity COPY like this:
   %AL = COPY %AL, %EAX<imp-def>
has no semantic effect, but encodes liveness information: Further users
of %EAX only depend on this instruction even though it does not define
the full register.

Replace the COPY with a KILL instruction in those cases to maintain this
liveness information. (This reverts a small part of r238588 but this
time adds a comment explaining why a KILL instruction is useful).

llvm-svn: 274952
This commit is contained in:
Matthias Braun 2016-07-09 00:19:07 +00:00
parent 07985809ab
commit 152e7c8b12
80 changed files with 9755 additions and 163 deletions

View File

@ -166,6 +166,7 @@ class VirtRegRewriter : public MachineFunctionPass {
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
public:
static char ID;
@ -346,6 +347,30 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
return true;
}
void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
if (!MI.isIdentityCopy())
return;
DEBUG(dbgs() << "Identity copy: " << MI);
++NumIdCopies;
// Copies like:
// %R0 = COPY %R0<undef>
// %AL = COPY %AL, %EAX<imp-def>
// give us additional liveness information: The target (super-)register
// must not be valid before this point. Replace the COPY with a KILL
// instruction to maintain this information.
if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
MI.setDesc(TII->get(TargetOpcode::KILL));
DEBUG(dbgs() << " replace by: " << MI);
return;
}
if (Indexes)
Indexes->removeMachineInstrFromMaps(MI);
MI.eraseFromParent();
DEBUG(dbgs() << " deleted.\n");
}
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
SmallVector<unsigned, 8> SuperDeads;
@ -435,15 +460,8 @@ void VirtRegRewriter::rewrite() {
DEBUG(dbgs() << "> " << *MI);
// Finally, remove any identity copies.
if (MI->isIdentityCopy()) {
++NumIdCopies;
DEBUG(dbgs() << "Deleting identity copy.\n");
if (Indexes)
Indexes->removeMachineInstrFromMaps(*MI);
// It's safe to erase MI because MII has already been incremented.
MI->eraseFromParent();
}
// We can remove identity copies right now.
handleIdentityCopy(*MI);
}
}
}

View File

@ -613,6 +613,7 @@ define <1 x i8> @getL() {
; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE
; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF]
; CHECK-NEXT: ; kill
; Ultimately we should generate str b0, but right now, we match the vector
; variant which does not allow to fold the immediate into the store.
; CHECK-NEXT: st1.b { v0 }[0], {{\[}}[[LDRGOT_REG]]]

View File

@ -98,6 +98,7 @@ define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
; CHECK-PWR: # kill
; CHECK-NEXT: blr
%t0 = fadd <4 x float> %x0, %x1
@ -115,6 +116,7 @@ define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
; CHECK-PWR: # kill
; CHECK-NEXT: blr
%t0 = fadd <4 x float> %x0, %x1
@ -132,6 +134,7 @@ define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
; CHECK-PWR: # kill
; CHECK-NEXT: blr
%t0 = fadd <4 x float> %x0, %x1
@ -149,6 +152,7 @@ define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <
; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35
; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37
; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]]
; CHECK-PWR: # kill
; CHECK-NEXT: blr
%t0 = fadd <4 x float> %x0, %x1

View File

@ -60,9 +60,11 @@ define void @call_intarg(i32 %i0, i8* %i1) {
; HARD: mov %i5, %g2
; HARD-NEXT: ld [%fp+92], %g3
; HARD-NEXT: mov %i4, %i5
; HARD-NEXT: ! kill
; HARD-NEXT: std %g2, [%fp+-24]
; HARD-NEXT: mov %i3, %i4
; HARD-NEXT: std %i4, [%fp+-16]
; HARD-NEXT: ! kill
; HARD-NEXT: std %i0, [%fp+-8]
; HARD-NEXT: st %i2, [%fp+-28]
; HARD-NEXT: ld [%fp+104], %f0

View File

@ -8,6 +8,7 @@ define i32 @foo(i32 %p, i8 zeroext %x) nounwind {
; X32-LABEL: foo:
; X32: # BB#0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def>
; X32-NEXT: divb {{[0-9]+}}(%esp)
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: andl $1, %eax
@ -16,6 +17,7 @@ define i32 @foo(i32 %p, i8 zeroext %x) nounwind {
; X64-LABEL: foo:
; X64: # BB#0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def>
; X64-NEXT: divb %sil
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: andl $1, %eax
@ -33,6 +35,7 @@ define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: divw {{[0-9]+}}(%esp)
; X32-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; X32-NEXT: andl $1, %eax
; X32-NEXT: retl
;
@ -41,6 +44,7 @@ define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: divw %si
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<def>
; X64-NEXT: andl $1, %eax
; X64-NEXT: retq
%q = trunc i32 %p to i16

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
define i32 @test_add_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
@ -64,6 +64,7 @@ define i8 @test_add_1_setcc_slt(i64* %p) #0 {
; CHECK-NEXT: movl $1, %eax
; CHECK-NEXT: lock xaddq %rax, (%rdi)
; CHECK-NEXT: shrq $63, %rax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %RAX<kill>
; CHECK-NEXT: retq
entry:
%tmp0 = atomicrmw add i64* %p, i64 1 seq_cst

View File

@ -9,6 +9,7 @@
define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castA:
; AVX: ## BB#0:
; AVX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX-NEXT: retq
@ -19,6 +20,7 @@ define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp {
define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castB:
; AVX: ## BB#0:
; AVX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; AVX-NEXT: retq
@ -31,12 +33,14 @@ define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp {
define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
; AVX1-LABEL: castC:
; AVX1: ## BB#0:
; AVX1-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: castC:
; AVX2: ## BB#0:
; AVX2-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
; AVX2-NEXT: retq
@ -50,6 +54,7 @@ define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
define <4 x float> @castD(<8 x float> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castD:
; AVX: ## BB#0:
; AVX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%shuffle.i = shufflevector <8 x float> %m, <8 x float> %m, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@ -59,6 +64,7 @@ define <4 x float> @castD(<8 x float> %m) nounwind uwtable readnone ssp {
define <2 x i64> @castE(<4 x i64> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castE:
; AVX: ## BB#0:
; AVX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%shuffle.i = shufflevector <4 x i64> %m, <4 x i64> %m, <2 x i32> <i32 0, i32 1>
@ -68,6 +74,7 @@ define <2 x i64> @castE(<4 x i64> %m) nounwind uwtable readnone ssp {
define <2 x double> @castF(<4 x double> %m) nounwind uwtable readnone ssp {
; AVX-LABEL: castF:
; AVX: ## BB#0:
; AVX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%shuffle.i = shufflevector <4 x double> %m, <4 x double> %m, <2 x i32> <i32 0, i32 1>

View File

@ -318,10 +318,12 @@ define <4 x i64> @test_mm256_castpd_si256(<4 x double> %a0) nounwind {
define <4 x double> @test_mm256_castpd128_pd256(<2 x double> %a0) nounwind {
; X32-LABEL: test_mm256_castpd128_pd256:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_castpd128_pd256:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X64-NEXT: retq
%res = shufflevector <2 x double> %a0, <2 x double> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x double> %res
@ -330,11 +332,13 @@ define <4 x double> @test_mm256_castpd128_pd256(<2 x double> %a0) nounwind {
define <2 x double> @test_mm256_castpd256_pd128(<4 x double> %a0) nounwind {
; X32-LABEL: test_mm256_castpd256_pd128:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_castpd256_pd128:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = shufflevector <4 x double> %a0, <4 x double> %a0, <2 x i32> <i32 0, i32 1>
@ -368,10 +372,12 @@ define <4 x i64> @test_mm256_castps_si256(<8 x float> %a0) nounwind {
define <8 x float> @test_mm256_castps128_ps256(<4 x float> %a0) nounwind {
; X32-LABEL: test_mm256_castps128_ps256:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_castps128_ps256:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X64-NEXT: retq
%res = shufflevector <4 x float> %a0, <4 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x float> %res
@ -380,11 +386,13 @@ define <8 x float> @test_mm256_castps128_ps256(<4 x float> %a0) nounwind {
define <4 x float> @test_mm256_castps256_ps128(<8 x float> %a0) nounwind {
; X32-LABEL: test_mm256_castps256_ps128:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_castps256_ps128:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = shufflevector <8 x float> %a0, <8 x float> %a0, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@ -394,10 +402,12 @@ define <4 x float> @test_mm256_castps256_ps128(<8 x float> %a0) nounwind {
define <4 x i64> @test_mm256_castsi128_si256(<2 x i64> %a0) nounwind {
; X32-LABEL: test_mm256_castsi128_si256:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_castsi128_si256:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
ret <4 x i64> %res
@ -430,11 +440,13 @@ define <8 x float> @test_mm256_castsi256_ps(<4 x i64> %a0) nounwind {
define <2 x i64> @test_mm256_castsi256_si128(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm256_castsi256_si128:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_castsi256_si128:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = shufflevector <4 x i64> %a0, <4 x i64> %a0, <2 x i32> <i32 0, i32 1>
@ -1032,11 +1044,13 @@ define <4 x i64> @test_mm256_insert_epi64(<4 x i64> %a0, i64 %a1) nounwind {
define <4 x double> @test_mm256_insertf128_pd(<4 x double> %a0, <2 x double> %a1) nounwind {
; X32-LABEL: test_mm256_insertf128_pd:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_insertf128_pd:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; X64-NEXT: retq
%ext = shufflevector <2 x double> %a1, <2 x double> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@ -1062,11 +1076,13 @@ define <8 x float> @test_mm256_insertf128_ps(<8 x float> %a0, <4 x float> %a1) n
define <4 x i64> @test_mm256_insertf128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind {
; X32-LABEL: test_mm256_insertf128_si256:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_insertf128_si256:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; X64-NEXT: retq
%ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@ -2173,11 +2189,13 @@ define <4 x i64> @test_mm256_set_epi64x(i64 %a0, i64 %a1, i64 %a2, i64 %a3) noun
define <8 x float> @test_mm256_set_m128(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-LABEL: test_mm256_set_m128:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_set_m128:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-NEXT: retq
%res = shufflevector <4 x float> %a1, <4 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -2187,11 +2205,13 @@ define <8 x float> @test_mm256_set_m128(<4 x float> %a0, <4 x float> %a1) nounwi
define <4 x double> @test_mm256_set_m128d(<2 x double> %a0, <2 x double> %a1) nounwind {
; X32-LABEL: test_mm256_set_m128d:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_set_m128d:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x double> %a0 to <4 x float>
@ -2204,11 +2224,13 @@ define <4 x double> @test_mm256_set_m128d(<2 x double> %a0, <2 x double> %a1) no
define <4 x i64> @test_mm256_set_m128i(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; X32-LABEL: test_mm256_set_m128i:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_set_m128i:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x float>
@ -2804,11 +2826,13 @@ define <4 x i64> @test_mm256_setr_epi64x(i64 %a0, i64 %a1, i64 %a2, i64 %a3) nou
define <8 x float> @test_mm256_setr_m128(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-LABEL: test_mm256_setr_m128:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_setr_m128:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X64-NEXT: retq
%res = shufflevector <4 x float> %a0, <4 x float> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -2818,11 +2842,13 @@ define <8 x float> @test_mm256_setr_m128(<4 x float> %a0, <4 x float> %a1) nounw
define <4 x double> @test_mm256_setr_m128d(<2 x double> %a0, <2 x double> %a1) nounwind {
; X32-LABEL: test_mm256_setr_m128d:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_setr_m128d:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x double> %a0 to <4 x float>
@ -2835,11 +2861,13 @@ define <4 x double> @test_mm256_setr_m128d(<2 x double> %a0, <2 x double> %a1) n
define <4 x i64> @test_mm256_setr_m128i(<2 x i64> %a0, <2 x i64> %a1) nounwind {
; X32-LABEL: test_mm256_setr_m128i:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_setr_m128i:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x float>

View File

@ -38,6 +38,7 @@ define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1
define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; CHECK-NEXT: retl
%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
@ -86,6 +87,7 @@ declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind read
define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
%res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)

View File

@ -6,6 +6,7 @@ define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
; CHECK: ## BB#0:
; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; CHECK-NEXT: ## kill
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%B = trunc <4 x i64> %A to <4 x i32>
@ -17,6 +18,7 @@ define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
; CHECK: ## BB#0:
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; CHECK-NEXT: ## kill
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%B = trunc <8 x i32> %A to <8 x i16>

View File

@ -1708,11 +1708,13 @@ define <4 x float> @test_mm256_mask_i64gather_ps(<4 x float> %a0, float *%a1, <4
define <4 x i64> @test0_mm256_inserti128_si256(<4 x i64> %a0, <2 x i64> %a1) nounwind {
; X32-LABEL: test0_mm256_inserti128_si256:
; X32: # BB#0:
; X32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X32-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; X32-NEXT: retl
;
; X64-LABEL: test0_mm256_inserti128_si256:
; X64: # BB#0:
; X64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
; X64-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; X64-NEXT: retq
%ext = shufflevector <2 x i64> %a1, <2 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>

View File

@ -279,6 +279,7 @@ define <8 x i16> @shl_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shl = shl <8 x i16> %r, %a
@ -330,6 +331,7 @@ define <8 x i16> @ashr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%ashr = ashr <8 x i16> %r, %a
@ -394,6 +396,7 @@ define <8 x i16> @lshr_8i16(<8 x i16> %r, <8 x i16> %a) nounwind {
; CHECK-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%lshr = lshr <8 x i16> %r, %a

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
@ -675,6 +675,7 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
; AVX512F-LABEL: test_mask_vminpd:
; AVX512F: ## BB#0:
; AVX512F-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
@ -689,6 +690,7 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
;
; AVX512BW-LABEL: test_mask_vminpd:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
@ -696,6 +698,7 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
;
; AVX512DQ-LABEL: test_mask_vminpd:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1}
@ -735,6 +738,7 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
; AVX512F-LABEL: test_mask_vmaxpd:
; AVX512F: ## BB#0:
; AVX512F-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512F-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512F-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
@ -749,6 +753,7 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
;
; AVX512BW-LABEL: test_mask_vmaxpd:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512BW-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512BW-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
@ -756,6 +761,7 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
;
; AVX512DQ-LABEL: test_mask_vmaxpd:
; AVX512DQ: ## BB#0:
; AVX512DQ-NEXT: ## kill: %YMM3<def> %YMM3<kill> %ZMM3<def>
; AVX512DQ-NEXT: vpxor %ymm4, %ymm4, %ymm4
; AVX512DQ-NEXT: vpcmpneqd %zmm4, %zmm3, %k1
; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1}

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
@ -132,6 +132,7 @@ define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: callq _func8xi1
; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; KNL-NEXT: vpslld $31, %ymm0, %ymm0
@ -160,6 +161,7 @@ define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
; KNL_X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_X32-NEXT: calll _func8xi1
; KNL_X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; KNL_X32-NEXT: vpslld $31, %ymm0, %ymm0
@ -277,6 +279,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: callq _func8xi1
; KNL-NEXT: vpmovsxwq %xmm0, %zmm0
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
@ -312,6 +315,7 @@ define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
; KNL_X32-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_X32-NEXT: calll _func8xi1
; KNL_X32-NEXT: vpmovsxwq %xmm0, %zmm0
; KNL_X32-NEXT: vpsllvq LCPI7_0, %zmm0, %zmm0

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
@ -90,6 +90,7 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
;
; SKX-LABEL: sltof2f32:
; SKX: ## BB#0:
; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0
; SKX-NEXT: retq
%b = sitofp <2 x i64> %a to <2 x float>
@ -287,7 +288,9 @@ define <16 x i32> @fptoui00(<16 x float> %a) nounwind {
define <8 x i32> @fptoui_256(<8 x float> %a) nounwind {
; KNL-LABEL: fptoui_256:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vcvttps2udq %zmm0, %zmm0
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: fptoui_256:
@ -301,7 +304,9 @@ define <8 x i32> @fptoui_256(<8 x float> %a) nounwind {
define <4 x i32> @fptoui_128(<4 x float> %a) nounwind {
; KNL-LABEL: fptoui_128:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL-NEXT: vcvttps2udq %zmm0, %zmm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: fptoui_128:
@ -324,7 +329,9 @@ define <8 x i32> @fptoui01(<8 x double> %a) nounwind {
define <4 x i32> @fptoui_256d(<4 x double> %a) nounwind {
; KNL-LABEL: fptoui_256d:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vcvttpd2udq %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: fptoui_256d:
@ -583,7 +590,9 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind {
define <4 x double> @uitof64_256(<4 x i32> %a) nounwind {
; KNL-LABEL: uitof64_256:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: uitof64_256:
@ -606,7 +615,9 @@ define <16 x float> @uitof32(<16 x i32> %a) nounwind {
define <8 x float> @uitof32_256(<8 x i32> %a) nounwind {
; KNL-LABEL: uitof32_256:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: uitof32_256:
@ -620,7 +631,9 @@ define <8 x float> @uitof32_256(<8 x i32> %a) nounwind {
define <4 x float> @uitof32_128(<4 x i32> %a) nounwind {
; KNL-LABEL: uitof32_128:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: uitof32_128:

View File

@ -348,6 +348,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re
; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: zext_8x8mem_to_8x32:
@ -371,6 +372,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re
; KNL-NEXT: vpmovsxbd (%rdi), %ymm0
; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: sext_8x8mem_to_8x32:
@ -705,6 +707,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind
; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: zext_8x16mem_to_8x32:
@ -728,6 +731,7 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw
; KNL-NEXT: vpmovsxwd (%rdi), %ymm0
; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: sext_8x16mem_to_8x32mask:
@ -761,6 +765,7 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind
; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; KNL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: zext_8x16_to_8x32mask:
@ -1323,6 +1328,7 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_16i8_to_16i1:
@ -1330,6 +1336,7 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
; SKX-NEXT: vpmovb2m %xmm0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: retq
%mask_b = trunc <16 x i8>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
@ -1342,6 +1349,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
; ALL-NEXT: vpslld $31, %zmm0, %zmm0
; ALL-NEXT: vptestmd %zmm0, %zmm0, %k0
; ALL-NEXT: kmovw %k0, %eax
; ALL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; ALL-NEXT: retq
%mask_b = trunc <16 x i32>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
@ -1379,6 +1387,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_8i16_to_8i1:
@ -1386,6 +1395,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
; SKX-NEXT: vpmovw2m %xmm0, %k0
; SKX-NEXT: kmovb %k0, %eax
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: retq
%mask_b = trunc <8 x i16>%a to <8 x i1>
%mask = bitcast <8 x i1> %mask_b to i8
@ -1395,6 +1405,8 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; KNL-LABEL: sext_8i1_8i32:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
; KNL-NEXT: knotw %k0, %k1
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
@ -1423,6 +1435,7 @@ define i16 @trunc_i32_to_i1(i32 %a) {
; ALL-NEXT: kmovw %eax, %k1
; ALL-NEXT: korw %k0, %k1, %k0
; ALL-NEXT: kmovw %k0, %eax
; ALL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; ALL-NEXT: retq
%a_i = trunc i32 %a to i1
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
@ -1435,6 +1448,7 @@ define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
; KNL: ## BB#0:
; KNL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: sext_8i1_8i16:

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
@ -14,6 +14,7 @@ define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind {
; SKX-LABEL: extract_subvector128_v32i16_first_element:
; SKX: ## BB#0:
; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; SKX-NEXT: retq
%r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %r1
@ -31,6 +32,7 @@ define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind {
; SKX-LABEL: extract_subvector128_v64i8_first_element:
; SKX: ## BB#0:
; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; SKX-NEXT: retq
%r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i8> %r1

View File

@ -1,11 +1,25 @@
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL --check-prefix=CHECK %s
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=CHECK %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL %s
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
;CHECK-LABEL: test1:
;CHECK: vinsertps
;CHECK: vinsertf32x4
;CHECK: ret
define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
; KNL-LABEL: test1:
; KNL: ## BB#0:
; KNL-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3]
; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: test1:
; SKX: ## BB#0:
; SKX-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3]
; SKX-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
; SKX-NEXT: vextractf32x4 $3, %zmm0, %xmm2
; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3]
; SKX-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%rrr = load float, float* %br
%rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
%rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
@ -15,19 +29,19 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
; KNL-LABEL: test2:
; KNL: ## BB#0:
; KNL-NEXT: vmovhpd (%rdi), %xmm0, %xmm2
; KNL-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2
; KNL-NEXT: vmovsd %xmm1, %xmm2, %xmm1
; KNL-NEXT: vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: test2:
; SKX: ## BB#0:
; SKX-NEXT: vmovhpd (%rdi), %xmm0, %xmm2
; SKX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
; SKX-NEXT: vinsertf64x2 $0, %xmm2, %zmm0, %zmm0
; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm2
; SKX-NEXT: vmovsd %xmm1, %xmm2, %xmm1
; SKX-NEXT: vmovsd {{.*#+}} xmm1 = xmm1[0],xmm2[1]
; SKX-NEXT: vinsertf64x2 $3, %xmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%rrr = load double, double* %br
@ -36,11 +50,20 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
ret <8 x double> %rrr3
}
;CHECK-LABEL: test3:
;CHECK: vextractf32x4 $1
;CHECK: vinsertf32x4 $0
;CHECK: ret
define <16 x float> @test3(<16 x float> %x) nounwind {
; KNL-LABEL: test3:
; KNL: ## BB#0:
; KNL-NEXT: vextractf32x4 $1, %zmm0, %xmm1
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: test3:
; SKX: ## BB#0:
; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm1
; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%eee = extractelement <16 x float> %x, i32 4
%rrr2 = insertelement <16 x float> %x, float %eee, i32 1
ret <16 x float> %rrr2
@ -67,70 +90,140 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind {
ret <8 x i64> %rrr2
}
;CHECK-LABEL: test5:
;CHECK: vextractps
;CHECK: ret
define i32 @test5(<4 x float> %x) nounwind {
; KNL-LABEL: test5:
; KNL: ## BB#0:
; KNL-NEXT: vextractps $3, %xmm0, %eax
; KNL-NEXT: retq
;
; SKX-LABEL: test5:
; SKX: ## BB#0:
; SKX-NEXT: vextractps $3, %xmm0, %eax
; SKX-NEXT: retq
%ef = extractelement <4 x float> %x, i32 3
%ei = bitcast float %ef to i32
ret i32 %ei
}
;CHECK-LABEL: test6:
;CHECK: vextractps {{.*}}, (%rdi)
;CHECK: ret
define void @test6(<4 x float> %x, float* %out) nounwind {
; KNL-LABEL: test6:
; KNL: ## BB#0:
; KNL-NEXT: vextractps $3, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: test6:
; SKX: ## BB#0:
; SKX-NEXT: vextractps $3, %xmm0, (%rdi)
; SKX-NEXT: retq
%ef = extractelement <4 x float> %x, i32 3
store float %ef, float* %out, align 4
ret void
}
;CHECK-LABEL: test7
;CHECK: vmovd
;CHECK: vpermps %zmm
;CHECK: ret
define float @test7(<16 x float> %x, i32 %ind) nounwind {
; KNL-LABEL: test7:
; KNL: ## BB#0:
; KNL-NEXT: vmovd %edi, %xmm1
; KNL-NEXT: vpermps %zmm0, %zmm1, %zmm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test7:
; SKX: ## BB#0:
; SKX-NEXT: vmovd %edi, %xmm1
; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0
; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; SKX-NEXT: retq
%e = extractelement <16 x float> %x, i32 %ind
ret float %e
}
;CHECK-LABEL: test8
;CHECK: vmovq
;CHECK: vpermpd %zmm
;CHECK: ret
define double @test8(<8 x double> %x, i32 %ind) nounwind {
; KNL-LABEL: test8:
; KNL: ## BB#0:
; KNL-NEXT: movslq %edi, %rax
; KNL-NEXT: vmovq %rax, %xmm1
; KNL-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test8:
; SKX: ## BB#0:
; SKX-NEXT: movslq %edi, %rax
; SKX-NEXT: vmovq %rax, %xmm1
; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0
; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; SKX-NEXT: retq
%e = extractelement <8 x double> %x, i32 %ind
ret double %e
}
;CHECK-LABEL: test9
;CHECK: vmovd
;CHECK: vpermps %ymm
;CHECK: ret
define float @test9(<8 x float> %x, i32 %ind) nounwind {
; KNL-LABEL: test9:
; KNL: ## BB#0:
; KNL-NEXT: vmovd %edi, %xmm1
; KNL-NEXT: vpermps %ymm0, %ymm1, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test9:
; SKX: ## BB#0:
; SKX-NEXT: vmovd %edi, %xmm1
; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0
; SKX-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; SKX-NEXT: retq
%e = extractelement <8 x float> %x, i32 %ind
ret float %e
}
;CHECK-LABEL: test10
;CHECK: vmovd
;CHECK: vpermd %zmm
;CHECK: vmovd %xmm0, %eax
;CHECK: ret
define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
; KNL-LABEL: test10:
; KNL: ## BB#0:
; KNL-NEXT: vmovd %edi, %xmm1
; KNL-NEXT: vpermd %zmm0, %zmm1, %zmm0
; KNL-NEXT: vmovd %xmm0, %eax
; KNL-NEXT: retq
;
; SKX-LABEL: test10:
; SKX: ## BB#0:
; SKX-NEXT: vmovd %edi, %xmm1
; SKX-NEXT: vpermd %zmm0, %zmm1, %zmm0
; SKX-NEXT: vmovd %xmm0, %eax
; SKX-NEXT: retq
%e = extractelement <16 x i32> %x, i32 %ind
ret i32 %e
}
;CHECK-LABEL: test11
;CHECK: vpcmpltud
;CHECK: kshiftlw $11
;CHECK: kshiftrw $15
;CHECK: testb
;CHECK: je
;CHECK: ret
;CHECK: ret
define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
; KNL-LABEL: test11:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: je LBB10_2
; KNL-NEXT: ## BB#1: ## %A
; KNL-NEXT: vmovaps %zmm1, %zmm0
; KNL-NEXT: retq
; KNL-NEXT: LBB10_2: ## %B
; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: test11:
; SKX: ## BB#0:
; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftlw $11, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je LBB10_2
; SKX-NEXT: ## BB#1: ## %A
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
; SKX-NEXT: LBB10_2: ## %B
; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; SKX-NEXT: retq
%cmp_res = icmp ult <16 x i32> %a, %b
%ia = extractelement <16 x i1> %cmp_res, i32 4
br i1 %ia, label %A, label %B
@ -141,70 +234,144 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
ret <16 x i32>%c
}
;CHECK-LABEL: test12
;CHECK: vpcmpgtq
;CHECK: kshiftlw $15
;CHECK: kshiftrw $15
;CHECK: testb
;CHECK: ret
define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
; KNL-LABEL: test12:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0
; KNL-NEXT: vpcmpgtq %zmm1, %zmm3, %k1
; KNL-NEXT: kunpckbw %k0, %k1, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: cmoveq %rsi, %rdi
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: retq
;
; SKX-LABEL: test12:
; SKX: ## BB#0:
; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0
; SKX-NEXT: vpcmpgtq %zmm1, %zmm3, %k1
; SKX-NEXT: kunpckbw %k0, %k1, %k0
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: cmoveq %rsi, %rdi
; SKX-NEXT: movq %rdi, %rax
; SKX-NEXT: retq
%cmpvector_func.i = icmp slt <16 x i64> %a, %b
%extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0
%res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
ret i64 %res
}
;CHECK-LABEL: test13
;CHECK: cmpl %esi, %edi
;CHECK: setb %al
;CHECK: kmovw %eax, %k0
;CHECK: movw $-4
;CHECK: korw
define i16 @test13(i32 %a, i32 %b) {
; KNL-LABEL: test13:
; KNL: ## BB#0:
; KNL-NEXT: cmpl %esi, %edi
; KNL-NEXT: setb %al
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: movw $-4, %ax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test13:
; SKX: ## BB#0:
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: kmovw %eax, %k0
; SKX-NEXT: movw $-4, %ax
; SKX-NEXT: kmovw %eax, %k1
; SKX-NEXT: korw %k0, %k1, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: retq
%cmp_res = icmp ult i32 %a, %b
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
%res = bitcast <16 x i1> %maskv to i16
ret i16 %res
}
;CHECK-LABEL: test14
;CHECK: vpcmpgtq
;KNL: kshiftlw $11
;KNL: kshiftrw $15
;KNL: testb
;SKX: kshiftlb $3
;SKX: kshiftrb $7
;SKX: testb
;CHECK: ret
define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
; KNL-LABEL: test14:
; KNL: ## BB#0:
; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb %al, %al
; KNL-NEXT: cmoveq %rsi, %rdi
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: retq
;
; SKX-LABEL: test14:
; SKX: ## BB#0:
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
; SKX-NEXT: kshiftlb $3, %k0, %k0
; SKX-NEXT: kshiftrb $7, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: cmoveq %rsi, %rdi
; SKX-NEXT: movq %rdi, %rax
; SKX-NEXT: retq
%cmpvector_func.i = icmp slt <8 x i64> %a, %b
%extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4
%res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
ret i64 %res
}
;CHECK-LABEL: test15
;CHECK: movb (%rdi), %al
;CHECK: movw $-1, %ax
;CHECK: cmovew
define i16 @test15(i1 *%addr) {
; KNL-LABEL: test15:
; KNL: ## BB#0:
; KNL-NEXT: movb (%rdi), %al
; KNL-NEXT: xorl %ecx, %ecx
; KNL-NEXT: testb %al, %al
; KNL-NEXT: movw $-1, %ax
; KNL-NEXT: cmovew %cx, %ax
; KNL-NEXT: retq
;
; SKX-LABEL: test15:
; SKX: ## BB#0:
; SKX-NEXT: movb (%rdi), %al
; SKX-NEXT: xorl %ecx, %ecx
; SKX-NEXT: testb %al, %al
; SKX-NEXT: movw $-1, %ax
; SKX-NEXT: cmovew %cx, %ax
; SKX-NEXT: retq
%x = load i1 , i1 * %addr, align 1
%x1 = insertelement <16 x i1> undef, i1 %x, i32 10
%x2 = bitcast <16 x i1>%x1 to i16
ret i16 %x2
}
;CHECK-LABEL: test16
;CHECK: movzbl (%rdi), %eax
;CHECK: kmovw
;CHECK: kshiftlw $10
;CHECK: korw
;CHECK: ret
define i16 @test16(i1 *%addr, i16 %a) {
; KNL-LABEL: test16:
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test16:
; SKX: ## BB#0:
; SKX-NEXT: movzbl (%rdi), %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: kmovw %esi, %k1
; SKX-NEXT: kshiftlw $10, %k0, %k0
; SKX-NEXT: korw %k0, %k1, %k0
; SKX-NEXT: kmovw %k0, %eax
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: retq
%x = load i1 , i1 * %addr, align 128
%a1 = bitcast i16 %a to <16 x i1>
%x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
@ -212,15 +379,30 @@ define i16 @test16(i1 *%addr, i16 %a) {
ret i16 %x2
}
;CHECK-LABEL: test17
;KNL: movzbl (%rdi), %eax
;KNL: andl $1, %eax
;KNL: kshiftlw $4
;KNL: korw
;SKX: kshiftlb $4
;SKX: korb
;CHECK: ret
define i8 @test17(i1 *%addr, i8 %a) {
; KNL-LABEL: test17:
; KNL: ## BB#0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kshiftlw $4, %k0, %k0
; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test17:
; SKX: ## BB#0:
; SKX-NEXT: movzbl (%rdi), %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: kmovb %esi, %k1
; SKX-NEXT: kshiftlb $4, %k0, %k0
; SKX-NEXT: korb %k0, %k1, %k0
; SKX-NEXT: kmovb %k0, %eax
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: retq
%x = load i1 , i1 * %addr, align 128
%a1 = bitcast i8 %a to <8 x i1>
%x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
@ -229,6 +411,13 @@ define i8 @test17(i1 *%addr, i8 %a) {
}
define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
; KNL-LABEL: extract_v8i64:
; KNL: ## BB#0:
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm0
; KNL-NEXT: vpextrq $1, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v8i64:
; SKX: ## BB#0:
; SKX-NEXT: vpextrq $1, %xmm0, %rax
@ -242,6 +431,13 @@ define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
}
define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
; KNL-LABEL: extract_v4i64:
; KNL: ## BB#0:
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrq $1, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v4i64:
; SKX: ## BB#0:
; SKX-NEXT: vpextrq $1, %xmm0, %rax
@ -255,6 +451,12 @@ define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
}
define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
; KNL-LABEL: extract_v2i64:
; KNL: ## BB#0:
; KNL-NEXT: vmovq %xmm0, %rax
; KNL-NEXT: vpextrq $1, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v2i64:
; SKX: ## BB#0:
; SKX-NEXT: vmovq %xmm0, %rax
@ -267,6 +469,13 @@ define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
}
define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
; KNL-LABEL: extract_v16i32:
; KNL: ## BB#0:
; KNL-NEXT: vpextrd $1, %xmm0, %eax
; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm0
; KNL-NEXT: vpextrd $1, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v16i32:
; SKX: ## BB#0:
; SKX-NEXT: vpextrd $1, %xmm0, %eax
@ -280,6 +489,13 @@ define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
}
define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
; KNL-LABEL: extract_v8i32:
; KNL: ## BB#0:
; KNL-NEXT: vpextrd $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrd $1, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v8i32:
; SKX: ## BB#0:
; SKX-NEXT: vpextrd $1, %xmm0, %eax
@ -293,6 +509,12 @@ define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
}
define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
; KNL-LABEL: extract_v4i32:
; KNL: ## BB#0:
; KNL-NEXT: vpextrd $1, %xmm0, %eax
; KNL-NEXT: vpextrd $3, %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v4i32:
; SKX: ## BB#0:
; SKX-NEXT: vpextrd $1, %xmm0, %eax
@ -305,11 +527,20 @@ define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
}
define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
; KNL-LABEL: extract_v32i16:
; KNL: ## BB#0:
; KNL-NEXT: vpextrw $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrw $1, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v32i16:
; SKX: ## BB#0:
; SKX-NEXT: vpextrw $1, %xmm0, %eax
; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: retq
%r1 = extractelement <32 x i16> %x, i32 1
%r2 = extractelement <32 x i16> %x, i32 9
@ -318,11 +549,20 @@ define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
}
define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
; KNL-LABEL: extract_v16i16:
; KNL: ## BB#0:
; KNL-NEXT: vpextrw $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrw $1, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v16i16:
; SKX: ## BB#0:
; SKX-NEXT: vpextrw $1, %xmm0, %eax
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: retq
%r1 = extractelement <16 x i16> %x, i32 1
%r2 = extractelement <16 x i16> %x, i32 9
@ -331,10 +571,18 @@ define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
}
define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
; KNL-LABEL: extract_v8i16:
; KNL: ## BB#0:
; KNL-NEXT: vpextrw $1, %xmm0, %eax
; KNL-NEXT: vpextrw $3, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v8i16:
; SKX: ## BB#0:
; SKX-NEXT: vpextrw $1, %xmm0, %eax
; SKX-NEXT: vpextrw $3, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: retq
%r1 = extractelement <8 x i16> %x, i32 1
%r2 = extractelement <8 x i16> %x, i32 3
@ -343,11 +591,20 @@ define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
}
define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
; KNL-LABEL: extract_v64i8:
; KNL: ## BB#0:
; KNL-NEXT: vpextrb $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrb $1, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v64i8:
; SKX: ## BB#0:
; SKX-NEXT: vpextrb $1, %xmm0, %eax
; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: retq
%r1 = extractelement <64 x i8> %x, i32 1
%r2 = extractelement <64 x i8> %x, i32 17
@ -356,11 +613,20 @@ define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
}
define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
; KNL-LABEL: extract_v32i8:
; KNL: ## BB#0:
; KNL-NEXT: vpextrb $1, %xmm0, %eax
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT: vpextrb $1, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v32i8:
; SKX: ## BB#0:
; SKX-NEXT: vpextrb $1, %xmm0, %eax
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: retq
%r1 = extractelement <32 x i8> %x, i32 1
%r2 = extractelement <32 x i8> %x, i32 17
@ -369,10 +635,18 @@ define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
}
define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
; KNL-LABEL: extract_v16i8:
; KNL: ## BB#0:
; KNL-NEXT: vpextrb $1, %xmm0, %eax
; KNL-NEXT: vpextrb $3, %xmm0, (%rdi)
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: extract_v16i8:
; SKX: ## BB#0:
; SKX-NEXT: vpextrb $1, %xmm0, %eax
; SKX-NEXT: vpextrb $3, %xmm0, (%rdi)
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: retq
%r1 = extractelement <16 x i8> %x, i32 1
%r2 = extractelement <16 x i8> %x, i32 3
@ -381,6 +655,15 @@ define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
}
define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
; KNL-LABEL: insert_v8i64:
; KNL: ## BB#0:
; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm1
; KNL-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
; KNL-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: insert_v8i64:
; SKX: ## BB#0:
; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
@ -396,6 +679,15 @@ define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
}
define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
; KNL-LABEL: insert_v4i64:
; KNL: ## BB#0:
; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: insert_v4i64:
; SKX: ## BB#0:
; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
@ -411,6 +703,12 @@ define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
}
define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
; KNL-LABEL: insert_v2i64:
; KNL: ## BB#0:
; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0
; KNL-NEXT: vpinsrq $3, %rdi, %xmm0, %xmm0
; KNL-NEXT: retq
;
; SKX-LABEL: insert_v2i64:
; SKX: ## BB#0:
; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm0
@ -423,6 +721,15 @@ define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
}
define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) {
; KNL-LABEL: insert_v16i32:
; KNL: ## BB#0:
; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1
; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm1
; KNL-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
; KNL-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: insert_v16i32:
; SKX: ## BB#0:
; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1
@ -570,6 +877,15 @@ define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) {
}
define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) {
; KNL-LABEL: insert_v32i8:
; KNL: ## BB#0:
; KNL-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1
; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1
; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT: retq
;
; SKX-LABEL: insert_v32i8:
; SKX: ## BB#0:
; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1
@ -643,7 +959,7 @@ define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
;
; SKX-LABEL: test_insert_128_v8f64:
; SKX: ## BB#0:
; SKX-NEXT: vunpcklpd %xmm1, %xmm0, %xmm1
; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
; SKX-NEXT: vinsertf64x2 $0, %xmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%r = insertelement <8 x double> %x, double %y, i32 1
@ -653,13 +969,13 @@ define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) {
; KNL-LABEL: test_insert_128_v16f32:
; KNL: ## BB#0:
; KNL-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1
; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: test_insert_128_v16f32:
; SKX: ## BB#0:
; SKX-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1
; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3]
; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; SKX-NEXT: retq
%r = insertelement <16 x float> %x, float %y, i32 1

View File

@ -504,6 +504,7 @@ define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
ret i16 %res
@ -515,6 +516,7 @@ define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret i16 %res
@ -527,6 +529,7 @@ define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
ret i8 %res
@ -538,6 +541,7 @@ define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret i8 %res
@ -550,6 +554,7 @@ define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
ret i16 %res
@ -561,6 +566,7 @@ define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
ret i16 %res
@ -573,6 +579,7 @@ define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
ret i8 %res
@ -584,6 +591,7 @@ define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
ret i8 %res

View File

@ -67,6 +67,7 @@ define i16 @unpckbw_test(i16 %a0, i16 %a1) {
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: kunpckbw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
ret i16 %res
@ -734,6 +735,7 @@ define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
ret i16 %res
@ -745,6 +747,7 @@ define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpneqpd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
ret i8 %res
@ -817,6 +820,7 @@ define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) {
; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: addb %cl, %al
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
%res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m)
@ -834,6 +838,7 @@ define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) {
; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
%res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m)
@ -4897,6 +4902,7 @@ define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
; CHECK-NEXT: retq
%res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
@ -4917,6 +4923,7 @@ define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1
; CHECK-NEXT: kandw %k2, %k1, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
; CHECK-NEXT: retq
%res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
@ -4939,6 +4946,7 @@ define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
; CHECK-NEXT: retq
%res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
@ -4960,6 +4968,7 @@ define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1,
; CHECK-NEXT: kmovw %k0, %edx
; CHECK-NEXT: andb %cl, %al
; CHECK-NEXT: andb %dl, %al
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
; CHECK-NEXT: retq
%res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
%res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
@ -5548,6 +5557,7 @@ declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x
define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
@ -5569,6 +5579,7 @@ declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x
define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
@ -5590,6 +5601,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32
define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
@ -5611,6 +5623,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>,
define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3]
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3]
@ -6340,6 +6353,7 @@ define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16
; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
%res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1)
@ -6358,6 +6372,7 @@ define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2
; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: addb %cl, %al
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1)

View File

@ -8,6 +8,7 @@ define i16 @mask16(i16 %x) {
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: knotw %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%m0 = bitcast i16 %x to <16 x i1>
%m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@ -35,6 +36,7 @@ define i8 @mask8(i8 %x) {
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: knotw %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: mask8:
@ -42,6 +44,7 @@ define i8 @mask8(i8 %x) {
; SKX-NEXT: kmovb %edi, %k0
; SKX-NEXT: knotb %k0, %k0
; SKX-NEXT: kmovb %k0, %eax
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: retq
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@ -136,6 +139,7 @@ define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
; CHECK-NEXT: kxorw %k1, %k0, %k0
; CHECK-NEXT: korw %k0, %k2, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%ma = load <16 x i1>, <16 x i1>* %x
%mb = load <16 x i1>, <16 x i1>* %y
@ -152,6 +156,7 @@ define i8 @shuf_test1(i16 %v) nounwind {
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftrw $8, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: shuf_test1:
@ -159,6 +164,7 @@ define i8 @shuf_test1(i16 %v) nounwind {
; SKX-NEXT: kmovw %edi, %k0
; SKX-NEXT: kshiftrw $8, %k0, %k0
; SKX-NEXT: kmovb %k0, %eax
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: retq
%v1 = bitcast i16 %v to <16 x i1>
%mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@ -201,6 +207,7 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
; CHECK-NEXT: kshiftlw $10, %k0, %k0
; CHECK-NEXT: kshiftrw $15, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
; CHECK-NEXT: retq
%cmp_res = icmp ugt <16 x i32> %a, %b
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
@ -674,6 +681,7 @@ define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
; KNL-LABEL: test22:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; KNL-NEXT: vpslld $31, %ymm0, %ymm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
@ -693,6 +701,7 @@ define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
; KNL-LABEL: test23:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL-NEXT: vpsllq $63, %zmm0, %zmm0
; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, %eax
@ -1395,6 +1404,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a) {
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: load_2i1:
@ -1414,6 +1424,7 @@ define <4 x i16> @load_4i1(<4 x i1>* %a) {
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: load_4i1:

View File

@ -90,6 +90,7 @@ define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%mask = load <8 x i1> , <8 x i1>* %m
%a = load <8 x i1> , <8 x i1>* %a.0
@ -120,6 +121,7 @@ define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%mask = load <8 x i1> , <8 x i1>* %m
%a = load <8 x i1> , <8 x i1>* %a.0
@ -138,6 +140,7 @@ define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) {
; CHECK-NEXT: kandw %k0, %k2, %k0
; CHECK-NEXT: korw %k0, %k1, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%mask = bitcast i8 %m to <8 x i1>
%a = bitcast i8 %a.0 to <8 x i1>

View File

@ -53,7 +53,9 @@ define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
; KNL-LABEL: trunc_qb_256:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_qb_256:
@ -67,6 +69,7 @@ define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
; KNL-LABEL: trunc_qb_256_mem:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; KNL-NEXT: vmovd %xmm0, (%rdi)
@ -128,7 +131,9 @@ define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
; KNL-LABEL: trunc_qw_256:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_qw_256:
@ -142,6 +147,7 @@ define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
; KNL-LABEL: trunc_qw_256_mem:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; KNL-NEXT: vmovq %xmm0, (%rdi)
@ -203,7 +209,9 @@ define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
; KNL-LABEL: trunc_qd_256:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_qd_256:
@ -217,6 +225,7 @@ define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
; KNL-LABEL: trunc_qd_256_mem:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovqd %zmm0, %ymm0
; KNL-NEXT: vmovaps %xmm0, (%rdi)
; KNL-NEXT: retq
@ -276,7 +285,9 @@ define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
; KNL-LABEL: trunc_db_256:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_db_256:
@ -290,6 +301,7 @@ define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
; KNL-LABEL: trunc_db_256_mem:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; KNL-NEXT: vmovq %xmm0, (%rdi)
@ -350,7 +362,9 @@ define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
; KNL-LABEL: trunc_dw_256:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_dw_256:
@ -364,6 +378,7 @@ define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
; KNL-LABEL: trunc_dw_256_mem:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: vmovaps %xmm0, (%rdi)
; KNL-NEXT: retq

View File

@ -124,6 +124,7 @@ define <8 x double> @_inreg8xdouble(double %a) {
define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
; ALL-LABEL: _sd8xdouble_mask:
; ALL: # BB#0:
; ALL-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
; ALL-NEXT: vpxor %ymm3, %ymm3, %ymm3
; ALL-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
; ALL-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
@ -139,6 +140,7 @@ define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %m
define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
; ALL-LABEL: _sd8xdouble_maskz:
; ALL: # BB#0:
; ALL-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; ALL-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
@ -164,6 +166,7 @@ define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
; ALL-LABEL: _sd8xdouble_mask_load:
; ALL: # BB#0:
; ALL-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; ALL-NEXT: vpxor %ymm2, %ymm2, %ymm2
; ALL-NEXT: vpcmpneqd %zmm2, %zmm1, %k1
; ALL-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
@ -179,6 +182,7 @@ define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8
define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
; ALL-LABEL: _sd8xdouble_maskz_load:
; ALL: # BB#0:
; ALL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; ALL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; ALL-NEXT: vpcmpneqd %zmm1, %zmm0, %k1
; ALL-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
@ -111,8 +111,11 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
; KNL-LABEL: test9:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test9:
@ -128,8 +131,11 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
; KNL-LABEL: test10:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test10:
@ -160,6 +166,7 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
; CHECK-NEXT: vpcmpeqq %zmm3, %zmm1, %k1
; CHECK-NEXT: kunpckbw %k0, %k1, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = icmp eq <16 x i64> %a, %b
%res1 = bitcast <16 x i1> %res to i16
@ -167,6 +174,155 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
}
define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
; KNL-LABEL: test12_v32i32:
; KNL: ## BB#0:
; KNL-NEXT: pushq %rbp
; KNL-NEXT: movq %rsp, %rbp
; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $32, %rsp
; KNL-NEXT: vpcmpeqd %zmm3, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm1
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $0, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $0, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rsp)
; KNL-NEXT: movl (%rsp), %eax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
;
; SKX-LABEL: test12_v32i32:
; SKX: ## BB#0:
; SKX-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
@ -180,6 +336,308 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
}
define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
; KNL-LABEL: test12_v64i16:
; KNL: ## BB#0:
; KNL-NEXT: pushq %rbp
; KNL-NEXT: movq %rsp, %rbp
; KNL-NEXT: andq $-32, %rsp
; KNL-NEXT: subq $64, %rsp
; KNL-NEXT: vpcmpeqw %ymm5, %ymm1, %ymm1
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm1
; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; KNL-NEXT: kshiftlw $0, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; KNL-NEXT: vpmovsxbd %xmm1, %zmm1
; KNL-NEXT: vpslld $31, %zmm1, %zmm1
; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm4, %ymm0, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $0, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, (%rsp)
; KNL-NEXT: vpcmpeqw %ymm7, %ymm3, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $0, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $13, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $12, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $11, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $10, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $9, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $8, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $7, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $6, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $5, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $4, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $3, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $1, %k0, %k1
; KNL-NEXT: kshiftrw $15, %k1, %k1
; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
; KNL-NEXT: kshiftlw $0, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; KNL-NEXT: movl (%rsp), %ecx
; KNL-NEXT: movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT: shlq $32, %rax
; KNL-NEXT: orq %rcx, %rax
; KNL-NEXT: movq %rbp, %rsp
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
;
; SKX-LABEL: test12_v64i16:
; SKX: ## BB#0:
; SKX-NEXT: vpcmpeqw %zmm2, %zmm0, %k0
@ -538,9 +996,12 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) no
define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) nounwind {
; KNL-LABEL: test35:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vmovups (%rdi), %ymm2
; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test35:
@ -649,9 +1110,12 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) n
define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) nounwind {
; KNL-LABEL: test41:
; KNL: ## BB#0:
; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vbroadcastss (%rdi), %ymm2
; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; SKX-LABEL: test41:

View File

@ -105,6 +105,7 @@ define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x
; CHECK-NEXT: kshiftlq $48, %k0, %k0
; CHECK-NEXT: kshiftrq $48, %k0, %k1
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.masked.load.v16i8(<16 x i8>* %addr, i32 4, <16 x i1>%mask, <16 x i8> undef)
ret <16 x i8> %res
@ -119,6 +120,7 @@ define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x
; CHECK-NEXT: kshiftlq $32, %k0, %k0
; CHECK-NEXT: kshiftrq $32, %k0, %k1
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; CHECK-NEXT: retq
%res = call <32 x i8> @llvm.masked.load.v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer)
ret <32 x i8> %res
@ -133,6 +135,7 @@ define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i1
; CHECK-NEXT: kshiftld $24, %k0, %k0
; CHECK-NEXT: kshiftrd $24, %k0, %k1
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.masked.load.v8i16(<8 x i16>* %addr, i32 4, <8 x i1>%mask, <8 x i16> undef)
ret <8 x i16> %res
@ -147,6 +150,7 @@ define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16
; CHECK-NEXT: kshiftld $16, %k0, %k0
; CHECK-NEXT: kshiftrd $16, %k0, %k1
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z}
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; CHECK-NEXT: retq
%res = call <16 x i16> @llvm.masked.load.v16i16(<16 x i16>* %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer)
ret <16 x i16> %res
@ -156,6 +160,7 @@ declare <16 x i16> @llvm.masked.load.v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i
define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) {
; CHECK-LABEL: test_mask_store_16xi8:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
; CHECK-NEXT: vpmovb2m %zmm0, %k0
; CHECK-NEXT: kshiftlq $48, %k0, %k0
@ -170,6 +175,7 @@ declare void @llvm.masked.store.v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>)
define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) {
; CHECK-LABEL: test_mask_store_32xi8:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
; CHECK-NEXT: vpmovb2m %zmm0, %k0
; CHECK-NEXT: kshiftlq $32, %k0, %k0
@ -184,6 +190,7 @@ declare void @llvm.masked.store.v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>)
define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) {
; CHECK-LABEL: test_mask_store_8xi16:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0
; CHECK-NEXT: vpmovw2m %zmm0, %k0
; CHECK-NEXT: kshiftld $24, %k0, %k0
@ -198,6 +205,7 @@ declare void @llvm.masked.store.v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>)
define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) {
; CHECK-LABEL: test_mask_store_16xi16:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0
; CHECK-NEXT: vpmovb2m %zmm0, %k0
; CHECK-NEXT: kshiftld $16, %k0, %k0

View File

@ -415,6 +415,7 @@ define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
ret i16 %res
@ -426,6 +427,7 @@ define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
ret i16 %res
@ -461,6 +463,7 @@ define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
ret i16 %res
@ -472,6 +475,7 @@ define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
ret i16 %res

View File

@ -402,6 +402,7 @@ define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
ret i16 %res
@ -413,6 +414,7 @@ define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
ret i16 %res
@ -425,6 +427,7 @@ define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
ret i8 %res
@ -436,6 +439,7 @@ define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
ret i8 %res
@ -448,6 +452,7 @@ define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
ret i16 %res
@ -459,6 +464,7 @@ define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
ret i16 %res
@ -471,6 +477,7 @@ define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
ret i8 %res
@ -482,6 +489,7 @@ define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
ret i8 %res
@ -5219,6 +5227,7 @@ define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
ret i16 %res
@ -5243,6 +5252,7 @@ define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
ret i8 %res
@ -5255,6 +5265,7 @@ define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
ret i16 %res
@ -5847,6 +5858,7 @@ define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x
; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
%res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
@ -5883,6 +5895,7 @@ define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
@ -5901,6 +5914,7 @@ define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16
; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
%res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
@ -5919,6 +5933,7 @@ define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %
; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
%res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
@ -5955,6 +5970,7 @@ define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2
; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
@ -5973,6 +5989,7 @@ define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16
; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
%res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)

View File

@ -457,6 +457,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) {
; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0
; CHECK-NEXT: kmovb %k0, %eax
; CHECK-NEXT: addb %cl, %al
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
@ -474,6 +475,7 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) {
; CHECK-NEXT: vfpclassps $4, %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: addl %ecx, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1)
%res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
@ -503,6 +505,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
; CHECK-NEXT: movb $-1, %cl
; CHECK-NEXT: LBB28_4:
; CHECK-NEXT: addb %cl, %al
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
@ -532,6 +535,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
; CHECK-NEXT: movb $-1, %cl
; CHECK-NEXT: LBB29_4:
; CHECK-NEXT: addb %cl, %al
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %AX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
@ -586,6 +590,7 @@ define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovd2m %zmm0, %k0
; CHECK-NEXT: kmovw %k0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
ret i16 %res
@ -598,6 +603,7 @@ define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovq2m %zmm0, %k0
; CHECK-NEXT: kmovb %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
ret i8 %res
@ -632,6 +638,7 @@ declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x
define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
@ -653,6 +660,7 @@ declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x
define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]
@ -674,6 +682,7 @@ declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32
define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
@ -695,6 +704,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>,
define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; CHECK-NEXT: kmovb %edi, %k1
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} {z} = zmm0[0,1,0,1,0,1,0,1]
; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,0,1,0,1,0,1]

View File

@ -7,6 +7,7 @@ define i8 @mask8(i8 %x) {
; CHECK-NEXT: kmovb %edi, %k0
; CHECK-NEXT: knotb %k0, %k0
; CHECK-NEXT: kmovb %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@ -56,6 +57,7 @@ define i8 @mand8_mem(<8 x i1>* %x, <8 x i1>* %y) {
; CHECK-NEXT: kxorb %k1, %k0, %k0
; CHECK-NEXT: korb %k0, %k2, %k0
; CHECK-NEXT: kmovb %k0, %eax
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%ma = load <8 x i1>, <8 x i1>* %x
%mb = load <8 x i1>, <8 x i1>* %y

View File

@ -2143,6 +2143,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0, i8 %x1) {
; CHECK-NEXT: vfpclassps $4, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc0,0x04]
; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 -1)
@ -2161,6 +2162,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0, i8 %x1) {
; CHECK-NEXT: vfpclassps $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc0,0x04]
; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 -1)
@ -2179,6 +2181,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0, i8 %x1) {
; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc0,0x02]
; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 -1)
@ -2197,6 +2200,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) {
; CHECK-NEXT: vfpclasspd $4, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc0,0x04]
; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 %x1)
%res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 -1)
@ -2274,6 +2278,7 @@ define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovd2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x39,0xc0]
; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
ret i8 %res
@ -2286,6 +2291,7 @@ define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovd2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x39,0xc0]
; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
ret i8 %res
@ -2298,6 +2304,7 @@ define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovq2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x39,0xc0]
; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
ret i8 %res
@ -2310,6 +2317,7 @@ define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpmovq2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x39,0xc0]
; CHECK-NEXT: kmovb %k0, %eax ## encoding: [0xc5,0xf9,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
ret i8 %res
@ -2367,6 +2375,7 @@ declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x
define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
; CHECK-NEXT: vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x23,0xd0,0x00]
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1]
@ -2391,6 +2400,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>,
define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf]
; CHECK-NEXT: vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x43,0xd0,0x00]
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,0,1]

View File

@ -980,6 +980,7 @@ define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
ret i8 %res
@ -991,6 +992,7 @@ define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
ret i8 %res
@ -1005,6 +1007,7 @@ define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
ret i8 %res
@ -1018,6 +1021,7 @@ define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
ret i8 %res
@ -1030,6 +1034,7 @@ define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
ret i8 %res
@ -1041,6 +1046,7 @@ define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
ret i8 %res
@ -1055,6 +1061,7 @@ define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
ret i8 %res
@ -1068,6 +1075,7 @@ define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
ret i8 %res
@ -1082,6 +1090,7 @@ define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
ret i8 %res
@ -1095,6 +1104,7 @@ define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
ret i8 %res
@ -1111,6 +1121,7 @@ define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
ret i8 %res
@ -1126,6 +1137,7 @@ define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
ret i8 %res
@ -1140,6 +1152,7 @@ define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
ret i8 %res
@ -1153,6 +1166,7 @@ define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
ret i8 %res
@ -1169,6 +1183,7 @@ define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
ret i8 %res
@ -1184,6 +1199,7 @@ define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
; CHECK-NEXT: kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
; CHECK-NEXT: kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
ret i8 %res

View File

@ -2900,6 +2900,7 @@ define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpleps %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
ret i8 %res
@ -2911,6 +2912,7 @@ define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vcmpleps %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
ret i8 %res
@ -2922,6 +2924,7 @@ define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vcmplepd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
ret i8 %res
@ -2933,6 +2936,7 @@ define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
; CHECK: ## BB#0:
; CHECK-NEXT: vcmplepd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
ret i8 %res
@ -6341,6 +6345,7 @@ declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x f
define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x23,0xd0,0x00]
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3]
@ -6364,6 +6369,7 @@ declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>,
define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
; CHECK-NEXT: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x43,0xd0,0x00]
; CHECK-NEXT: ## ymm2 {%k1} {z} = ymm0[0,1,2,3,0,1,2,3]
@ -8107,6 +8113,7 @@ define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
@ -8125,6 +8132,7 @@ define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
@ -8143,6 +8151,7 @@ define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
@ -8161,6 +8170,7 @@ define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
@ -8179,6 +8189,7 @@ define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2
; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
@ -8197,6 +8208,7 @@ define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2
; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
@ -8215,6 +8227,7 @@ define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2
; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
@ -8233,6 +8246,7 @@ define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2
; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
%res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)

View File

@ -131,6 +131,8 @@ define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind {
; CHECK-NEXT: shrl $15, %edx
; CHECK-NEXT: orl %esi, %edx
; CHECK-NEXT: orl %ecx, %edx
; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: # kill: %DX<def> %DX<kill> %EDX<kill>
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: retl

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s
declare i8 @llvm.cttz.i8(i8, i1)
@ -12,6 +12,7 @@ define i8 @t1(i8 %x) {
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: orl $256, %eax # imm = 0x100
; CHECK-NEXT: tzcntl %eax, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
ret i8 %tmp
@ -59,6 +60,7 @@ define i8 @t5(i8 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: tzcntl %eax, %eax
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
%tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 true )
ret i8 %tmp
@ -455,6 +457,7 @@ entry:
define i64 @bzhi64b(i64 %x, i8 zeroext %index) {
; CHECK-LABEL: bzhi64b:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: bzhiq %rsi, %rdi, %rax
; CHECK-NEXT: retq
entry:

View File

@ -14,6 +14,7 @@ define i8 @cttz_i8(i8 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: bsfl %eax, %eax
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
ret i8 %tmp
@ -52,6 +53,7 @@ define i8 @ctlz_i8(i8 %x) {
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: bsrl %eax, %eax
; CHECK-NEXT: xorl $7, %eax
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
ret i8 %tmp2
@ -62,6 +64,7 @@ define i16 @ctlz_i16(i16 %x) {
; CHECK: # BB#0:
; CHECK-NEXT: bsrw %di, %ax
; CHECK-NEXT: xorl $15, %eax
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
ret i16 %tmp2
@ -100,6 +103,7 @@ define i8 @ctlz_i8_zero_test(i8 %n) {
; CHECK-NEXT: bsrl %eax, %eax
; CHECK-NEXT: xorl $7, %eax
; CHECK-NEXT: .LBB8_2: # %cond.end
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%tmp1 = call i8 @llvm.ctlz.i8(i8 %n, i1 false)
ret i8 %tmp1
@ -117,6 +121,7 @@ define i16 @ctlz_i16_zero_test(i16 %n) {
; CHECK-NEXT: bsrw %di, %ax
; CHECK-NEXT: xorl $15, %eax
; CHECK-NEXT: .LBB9_2: # %cond.end
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%tmp1 = call i16 @llvm.ctlz.i16(i16 %n, i1 false)
ret i16 %tmp1
@ -168,6 +173,7 @@ define i8 @cttz_i8_zero_test(i8 %n) {
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: bsfl %eax, %eax
; CHECK-NEXT: .LBB12_2: # %cond.end
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)
ret i8 %tmp1

View File

@ -18,11 +18,13 @@ define i8 @extractelement_v16i8_1(<16 x i8> %a) nounwind {
; SSE41-LABEL: extractelement_v16i8_1:
; SSE41: # BB#0:
; SSE41-NEXT: pextrb $1, %xmm0, %eax
; SSE41-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE41-NEXT: retq
;
; AVX-LABEL: extractelement_v16i8_1:
; AVX: # BB#0:
; AVX-NEXT: vpextrb $1, %xmm0, %eax
; AVX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX-NEXT: retq
%b = extractelement <16 x i8> %a, i256 1
ret i8 %b
@ -38,11 +40,13 @@ define i8 @extractelement_v16i8_11(<16 x i8> %a) nounwind {
; SSE41-LABEL: extractelement_v16i8_11:
; SSE41: # BB#0:
; SSE41-NEXT: pextrb $11, %xmm0, %eax
; SSE41-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE41-NEXT: retq
;
; AVX-LABEL: extractelement_v16i8_11:
; AVX: # BB#0:
; AVX-NEXT: vpextrb $11, %xmm0, %eax
; AVX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX-NEXT: retq
%b = extractelement <16 x i8> %a, i256 11
ret i8 %b
@ -58,11 +62,13 @@ define i8 @extractelement_v16i8_14(<16 x i8> %a) nounwind {
; SSE41-LABEL: extractelement_v16i8_14:
; SSE41: # BB#0:
; SSE41-NEXT: pextrb $14, %xmm0, %eax
; SSE41-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE41-NEXT: retq
;
; AVX-LABEL: extractelement_v16i8_14:
; AVX: # BB#0:
; AVX-NEXT: vpextrb $14, %xmm0, %eax
; AVX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX-NEXT: retq
%b = extractelement <16 x i8> %a, i256 14
ret i8 %b
@ -78,11 +84,13 @@ define i8 @extractelement_v32i8_1(<32 x i8> %a) nounwind {
; SSE41-LABEL: extractelement_v32i8_1:
; SSE41: # BB#0:
; SSE41-NEXT: pextrb $1, %xmm0, %eax
; SSE41-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE41-NEXT: retq
;
; AVX-LABEL: extractelement_v32i8_1:
; AVX: # BB#0:
; AVX-NEXT: vpextrb $1, %xmm0, %eax
; AVX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%b = extractelement <32 x i8> %a, i256 1
@ -99,12 +107,14 @@ define i8 @extractelement_v32i8_17(<32 x i8> %a) nounwind {
; SSE41-LABEL: extractelement_v32i8_17:
; SSE41: # BB#0:
; SSE41-NEXT: pextrb $1, %xmm1, %eax
; SSE41-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE41-NEXT: retq
;
; AVX1-LABEL: extractelement_v32i8_17:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpextrb $1, %xmm0, %eax
; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -112,6 +122,7 @@ define i8 @extractelement_v32i8_17(<32 x i8> %a) nounwind {
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vpextrb $1, %xmm0, %eax
; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%b = extractelement <32 x i8> %a, i256 17
@ -122,11 +133,13 @@ define i16 @extractelement_v8i16_0(<8 x i16> %a, i256 %i) nounwind {
; SSE-LABEL: extractelement_v8i16_0:
; SSE: # BB#0:
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX-LABEL: extractelement_v8i16_0:
; AVX: # BB#0:
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX-NEXT: retq
%b = extractelement <8 x i16> %a, i256 0
ret i16 %b
@ -136,11 +149,13 @@ define i16 @extractelement_v8i16_3(<8 x i16> %a, i256 %i) nounwind {
; SSE-LABEL: extractelement_v8i16_3:
; SSE: # BB#0:
; SSE-NEXT: pextrw $3, %xmm0, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX-LABEL: extractelement_v8i16_3:
; AVX: # BB#0:
; AVX-NEXT: vpextrw $3, %xmm0, %eax
; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX-NEXT: retq
%b = extractelement <8 x i16> %a, i256 3
ret i16 %b
@ -150,11 +165,13 @@ define i16 @extractelement_v16i16_0(<16 x i16> %a, i256 %i) nounwind {
; SSE-LABEL: extractelement_v16i16_0:
; SSE: # BB#0:
; SSE-NEXT: movd %xmm0, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX-LABEL: extractelement_v16i16_0:
; AVX: # BB#0:
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%b = extractelement <16 x i16> %a, i256 0
@ -165,12 +182,14 @@ define i16 @extractelement_v16i16_13(<16 x i16> %a, i256 %i) nounwind {
; SSE-LABEL: extractelement_v16i16_13:
; SSE: # BB#0:
; SSE-NEXT: pextrw $5, %xmm1, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX1-LABEL: extractelement_v16i16_13:
; AVX1: # BB#0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpextrw $5, %xmm0, %eax
; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -178,6 +197,7 @@ define i16 @extractelement_v16i16_13(<16 x i16> %a, i256 %i) nounwind {
; AVX2: # BB#0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vpextrw $5, %xmm0, %eax
; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%b = extractelement <16 x i16> %a, i256 13

View File

@ -43,6 +43,7 @@ define i16 @test_cvtss_sh(float %a0) nounwind {
; X32-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; X32-NEXT: vcvtps2ph $0, %xmm0, %xmm0
; X32-NEXT: vmovd %xmm0, %eax
; X32-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X32-NEXT: retl
;
; X64-LABEL: test_cvtss_sh:
@ -51,6 +52,7 @@ define i16 @test_cvtss_sh(float %a0) nounwind {
; X64-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; X64-NEXT: vcvtps2ph $0, %xmm0, %xmm0
; X64-NEXT: vmovd %xmm0, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%ins0 = insertelement <4 x float> undef, float %a0, i32 0
%ins1 = insertelement <4 x float> %ins0, float 0.000000e+00, i32 1

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=1 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWON64 %s
; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=0 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWOFF64 %s
; RUN: llc -verify-machineinstrs -fixup-byte-word-insts=1 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWON32 %s
@ -54,6 +54,7 @@ define i8 @test_movb_hreg(i16 %a0) {
; X64-NEXT: movl %edi, %eax
; X64-NEXT: shrl $8, %eax
; X64-NEXT: addb %dil, %al
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
;
; X32-LABEL: test_movb_hreg:

View File

@ -17,13 +17,17 @@ entry:
; X64-LABEL: foo
; X64: callq
; X64-NEXT: # kill
; X64-NEXT: shrl $8, %eax
; X64-NEXT: # kill
; X64-NEXT: popq
; X64-NEXT: retq
; X32-LABEL: foo
; X32: callq
; X32-NEXT: # kill
; X32-NEXT: shrl $8, %eax
; X32-NEXT: # kill
; X32-NEXT: popq
; X32-NEXT: retq
}

View File

@ -10,9 +10,12 @@
define i16 @reassociate_muls_i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3) {
; CHECK-LABEL: reassociate_muls_i16:
; CHECK: # BB#0:
; CHECK-NEXT: # kill
; CHECK-NEXT: # kill
; CHECK-NEXT: leal (%rdi,%rsi), %eax
; CHECK-NEXT: imull %ecx, %edx
; CHECK-NEXT: imull %edx, %eax
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%t0 = add i16 %x0, %x1
%t1 = mul i16 %x2, %t0
@ -23,6 +26,8 @@ define i16 @reassociate_muls_i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3) {
define i32 @reassociate_muls_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
; CHECK-LABEL: reassociate_muls_i32:
; CHECK: # BB#0:
; CHECK-NEXT: # kill
; CHECK-NEXT: # kill
; CHECK-NEXT: leal (%rdi,%rsi), %eax
; CHECK-NEXT: imull %ecx, %edx
; CHECK-NEXT: imull %edx, %eax

View File

@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_64
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_32
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX
@ -676,23 +675,25 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
;
; KNL_64-LABEL: test15:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL_64: vpxor %ymm2, %ymm2, %ymm2
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
; KNL_64-NEXT: # kill
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test15:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL_32: vpxor %ymm2, %ymm2, %ymm2
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
; KNL_32-NEXT: # kill
; KNL_32-NEXT: retl
;
; SKX-LABEL: test15:
@ -723,7 +724,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
;
; KNL_64-LABEL: test16:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
@ -737,7 +738,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
;
; KNL_32-LABEL: test16:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
@ -777,7 +778,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
;
; KNL_64-LABEL: test17:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
@ -787,7 +788,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
;
; KNL_32-LABEL: test17:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpsllvq .LCPI16_0, %zmm1, %zmm1
@ -829,7 +830,7 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
;
; KNL_64-LABEL: test18:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
@ -838,7 +839,7 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
;
; KNL_32-LABEL: test18:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2
@ -867,7 +868,7 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind
;
; KNL_64-LABEL: test19:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
@ -879,7 +880,7 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind
;
; KNL_32-LABEL: test19:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
@ -914,7 +915,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; KNL_64-LABEL: test20:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_64: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_64-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
@ -925,7 +926,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; KNL_32-LABEL: test20:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_32: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_32-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
@ -938,7 +939,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; SKX-LABEL: test20:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
@ -963,7 +964,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; KNL_64-LABEL: test21:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
@ -973,7 +974,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; KNL_32-LABEL: test21:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vpsllvq .LCPI20_0, %zmm2, %zmm2
@ -983,7 +984,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; SKX-LABEL: test21:
; SKX: # BB#0:
; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
@ -993,7 +994,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; SKX_32-LABEL: test21:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX_32: vpsllq $63, %xmm2, %xmm2
; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX_32-NEXT: kshiftlb $6, %k0, %k0
; SKX_32-NEXT: kshiftrb $6, %k0, %k1
@ -1012,7 +1013,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
;
; KNL_64-LABEL: test22:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_64: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_64-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
@ -1026,7 +1027,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
;
; KNL_32-LABEL: test22:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
@ -1074,7 +1075,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
;
; KNL_64-LABEL: test23:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
@ -1084,7 +1085,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
;
; KNL_32-LABEL: test23:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpsllvq .LCPI22_0, %zmm1, %zmm1
@ -1118,7 +1119,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test24:
; KNL_64: # BB#0:
; KNL_64-NEXT: movb $3, %al
; KNL_64: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
; KNL_64-NEXT: vmovaps %zmm1, %zmm0
@ -1126,7 +1127,7 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
;
; KNL_32-LABEL: test24:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpxord %zmm1, %zmm1, %zmm1
; KNL_32-NEXT: vinserti32x4 $0, .LCPI23_0, %zmm1, %zmm1
; KNL_32-NEXT: vpsllvq .LCPI23_1, %zmm1, %zmm1
@ -1159,7 +1160,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
;
; KNL_64-LABEL: test25:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
@ -1169,7 +1170,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
;
; KNL_32-LABEL: test25:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpsllvq .LCPI24_0, %zmm1, %zmm1
@ -1204,7 +1205,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
;
; KNL_64-LABEL: test26:
; KNL_64: # BB#0:
; KNL_64-NEXT: movb $3, %al
; KNL_64: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
; KNL_64-NEXT: vmovaps %zmm1, %zmm0
@ -1212,7 +1213,7 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
;
; KNL_32-LABEL: test26:
; KNL_32: # BB#0:
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
; KNL_32-NEXT: vinserti32x4 $0, .LCPI25_0, %zmm2, %zmm2
; KNL_32-NEXT: vpsllvq .LCPI25_1, %zmm2, %zmm2
@ -1251,6 +1252,7 @@ define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
; KNL_64-NEXT: # kill
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test27:
@ -1261,6 +1263,7 @@ define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; KNL_32-NEXT: movb $3, %cl
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
; KNL_32-NEXT: # kill
; KNL_32-NEXT: retl
;
; SKX-LABEL: test27:
@ -1282,7 +1285,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; KNL_64-LABEL: test28:
; KNL_64: # BB#0:
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
@ -1290,7 +1293,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; KNL_32-LABEL: test28:
; KNL_32: # BB#0:
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
; KNL_32-NEXT: vinserti32x4 $0, .LCPI27_0, %zmm2, %zmm2
; KNL_32-NEXT: vpsllvq .LCPI27_1, %zmm2, %zmm2
@ -1300,7 +1303,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; SKX-LABEL: test28:
; SKX: # BB#0:
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: movb $3, %al
; SKX-NEXT: kmovb %eax, %k1
; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
@ -1308,7 +1311,7 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; SKX_32-LABEL: test28:
; SKX_32: # BB#0:
; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: movb $3, %al
; SKX_32-NEXT: kmovb %eax, %k1
; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}

File diff suppressed because it is too large Load Diff

View File

@ -125,6 +125,7 @@ entry:
; CHECK32-LABEL: one16:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: incl %eax
; CHECK32-NEXT: # kill
; CHECK32-NEXT: retl
}
@ -135,6 +136,7 @@ entry:
; CHECK32-LABEL: minus_one16:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NEXT: # kill
; CHECK32-NEXT: retl
}

View File

@ -102,6 +102,7 @@ define void @float_call_signbit(double %n) {
; CHECK: ## BB#0: ## %entry
; CHECK-NEXT: movd %xmm0, %rdi
; CHECK-NEXT: shrq $63, %rdi
; CHECK-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<kill>
; CHECK-NEXT: jmp _float_call_signbit_callee ## TAILCALL
entry:
%t0 = bitcast double %n to i64

View File

@ -9,6 +9,8 @@
define i32 @or_shift1_and1(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift1_and1:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi,2), %eax
; CHECK-NEXT: retq
@ -22,6 +24,8 @@ define i32 @or_shift1_and1(i32 %x, i32 %y) {
define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift1_and1_swapped:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi,2), %eax
; CHECK-NEXT: retq
@ -35,6 +39,8 @@ define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) {
define i32 @or_shift2_and1(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift2_and1:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi,4), %eax
; CHECK-NEXT: retq
@ -48,6 +54,8 @@ define i32 @or_shift2_and1(i32 %x, i32 %y) {
define i32 @or_shift3_and1(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift3_and1:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi,8), %eax
; CHECK-NEXT: retq
@ -61,6 +69,8 @@ define i32 @or_shift3_and1(i32 %x, i32 %y) {
define i32 @or_shift3_and7(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift3_and7:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: andl $7, %esi
; CHECK-NEXT: leal (%rsi,%rdi,8), %eax
; CHECK-NEXT: retq
@ -76,6 +86,8 @@ define i32 @or_shift3_and7(i32 %x, i32 %y) {
define i32 @or_shift4_and1(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift4_and1:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: shll $4, %edi
; CHECK-NEXT: andl $1, %esi
; CHECK-NEXT: leal (%rsi,%rdi), %eax
@ -92,6 +104,7 @@ define i32 @or_shift4_and1(i32 %x, i32 %y) {
define i32 @or_shift3_and8(i32 %x, i32 %y) {
; CHECK-LABEL: or_shift3_and8:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; CHECK-NEXT: leal (,%rdi,8), %eax
; CHECK-NEXT: andl $8, %esi
; CHECK-NEXT: orl %esi, %eax

View File

@ -66,6 +66,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind {
; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
entry:
%A = mul <16 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
@ -206,6 +207,7 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind {
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
entry:
%A = mul <16 x i8> %i, %j

View File

@ -8,6 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
define i64 @foo64(i1 zeroext %i, i32 %j) #0 {
; CHECK-LABEL: foo64:
; CHECK: # BB#0:
; CHECK-NEXT: # kill
; CHECK-NEXT: orq $-2, %rdi
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq

View File

@ -5,6 +5,7 @@ entry:
; CHECK-LABEL: foo:
; CHECK: movzwl 4(%esp), %eax
; CHECK-NEXT: xorl $21998, %eax
; CHECK-NEXT: # kill
; CHECK-NEXT: retl
%0 = xor i16 %x, 21998
ret i16 %0
@ -15,6 +16,7 @@ entry:
; CHECK-LABEL: bar:
; CHECK: movzwl 4(%esp), %eax
; CHECK-NEXT: xorl $54766, %eax
; CHECK-NEXT: # kill
; CHECK-NEXT: retl
%0 = xor i16 %x, 54766
ret i16 %0

View File

@ -28,6 +28,7 @@ define i32 @test__blcfill_u32(i32 %a0) {
;
; X64-LABEL: test__blcfill_u32:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: andl %edi, %eax
; X64-NEXT: retq
@ -47,6 +48,7 @@ define i32 @test__blci_u32(i32 %a0) {
;
; X64-LABEL: test__blci_u32:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: xorl $-1, %eax
; X64-NEXT: orl %edi, %eax
@ -91,6 +93,7 @@ define i32 @test__blcmsk_u32(i32 %a0) {
;
; X64-LABEL: test__blcmsk_u32:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: xorl %edi, %eax
; X64-NEXT: retq
@ -109,6 +112,7 @@ define i32 @test__blcs_u32(i32 %a0) {
;
; X64-LABEL: test__blcs_u32:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 1(%rdi), %eax
; X64-NEXT: orl %edi, %eax
; X64-NEXT: retq

View File

@ -9,6 +9,7 @@ define i8 @foo(i8 %tmp325) {
; CHECK-NEXT: andl $28672, %eax # imm = 0x7000
; CHECK-NEXT: shrl $12, %eax
; CHECK-NEXT: movb $37, %dl
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: mulb %dl
; CHECK-NEXT: subb %al, %cl
; CHECK-NEXT: movl %ecx, %eax

View File

@ -57,8 +57,10 @@ define i8 @and_pow_2(i8 %x, i8 %y) {
; CHECK: # BB#0:
; CHECK-NEXT: andb $4, %sil
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: # kill: %EAX<def> %EAX<kill> %AX<def>
; CHECK-NEXT: divb %sil
; CHECK-NEXT: movzbl %ah, %eax # NOREX
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: retq
;
%and = and i8 %y, 4
@ -66,7 +68,7 @@ define i8 @and_pow_2(i8 %x, i8 %y) {
ret i8 %urem
}
; A vector splat constant divisor should get the same treatment as a scalar.
; A vector splat constant divisor should get the same treatment as a scalar.
define <4 x i32> @vec_const_pow_2(<4 x i32> %x) {
; CHECK-LABEL: vec_const_pow_2:

View File

@ -81,6 +81,7 @@ define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
;
; AVX-LABEL: fptosi_4f64_to_2i32:
; AVX: # BB#0:
; AVX-NEXT: # kill
; AVX-NEXT: vcvttpd2dqy %ymm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq

View File

@ -17,6 +17,7 @@ define void @t1(i32 %a, x86_mmx* %P) nounwind {
;
; X64-LABEL: t1:
; X64: # BB#0:
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: shll $12, %edi
; X64-NEXT: movd %rdi, %xmm0
; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]

View File

@ -16,6 +16,7 @@ define x86_mmx @t0(i32 %A) nounwind {
;
; X64-LABEL: t0:
; X64: ## BB#0:
; X64-NEXT: ## kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: movd %rdi, %xmm0
; X64-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]

View File

@ -61,6 +61,7 @@ define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
; AVX-LABEL: sitofp_4i32_to_2f64:
; AVX: # BB#0:
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX-NEXT: # kill
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%cvt = sitofp <4 x i32> %a to <4 x double>
@ -98,6 +99,7 @@ define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -105,6 +107,7 @@ define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = sitofp <8 x i16> %a to <8 x double>
@ -144,6 +147,7 @@ define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -152,6 +156,7 @@ define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = sitofp <16 x i8> %a to <16 x double>
@ -432,6 +437,7 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -445,6 +451,7 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: vaddpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <4 x i32> %a to <4 x double>
@ -482,6 +489,7 @@ define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -489,6 +497,7 @@ define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <8 x i16> %a to <8 x double>
@ -528,6 +537,7 @@ define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
; AVX1: # BB#0:
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -536,6 +546,7 @@ define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <16 x i8> %a to <16 x double>
@ -890,6 +901,7 @@ define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -897,6 +909,7 @@ define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = sitofp <8 x i16> %a to <8 x float>
@ -939,6 +952,7 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -947,6 +961,7 @@ define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = sitofp <16 x i8> %a to <16 x float>
@ -1384,6 +1399,7 @@ define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -1391,6 +1407,7 @@ define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
; AVX2: # BB#0:
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <8 x i16> %a to <8 x float>
@ -1433,6 +1450,7 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX1-NEXT: # kill
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@ -1441,6 +1459,7 @@ define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: # kill
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
%cvt = uitofp <16 x i8> %a to <16 x float>

View File

@ -13,18 +13,20 @@ define i16 @test1(float %f) nounwind {
; CHECK-NEXT: minss LCPI0_2, %xmm0
; CHECK-NEXT: maxss %xmm1, %xmm0
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retl
%tmp = insertelement <4 x float> undef, float %f, i32 0 ; <<4 x float>> [#uses=1]
%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]
%tmp69 = trunc i32 %tmp.upgrd.1 to i16 ; <i16> [#uses=1]
ret i16 %tmp69
;
%tmp = insertelement <4 x float> undef, float %f, i32 0 ; <<4 x float>> [#uses=1]
%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1]
%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]
%tmp69 = trunc i32 %tmp.upgrd.1 to i16 ; <i16> [#uses=1]
ret i16 %tmp69
}
define i16 @test2(float %f) nounwind {
@ -37,15 +39,17 @@ define i16 @test2(float %f) nounwind {
; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: maxss %xmm1, %xmm0
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retl
%tmp28 = fsub float %f, 1.000000e+00 ; <float> [#uses=1]
%tmp37 = fmul float %tmp28, 5.000000e-01 ; <float> [#uses=1]
%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0 ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]
%tmp69 = trunc i32 %tmp to i16 ; <i16> [#uses=1]
ret i16 %tmp69
;
%tmp28 = fsub float %f, 1.000000e+00 ; <float> [#uses=1]
%tmp37 = fmul float %tmp28, 5.000000e-01 ; <float> [#uses=1]
%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0 ; <<4 x float>> [#uses=1]
%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > ) ; <<4 x float>> [#uses=1]
%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]
%tmp69 = trunc i32 %tmp to i16 ; <i16> [#uses=1]
ret i16 %tmp69
}
declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
@ -68,6 +72,7 @@ define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: roundss $4, (%eax), %xmm0
; CHECK-NEXT: retl
;
%a = load float , float *%b
%B = insertelement <4 x float> undef, float %a, i32 0
%X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)
@ -86,6 +91,7 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
; CHECK-NEXT: roundss $4, %xmm1, %xmm0
; CHECK-NEXT: addl $28, %esp
; CHECK-NEXT: retl
;
%a = load float , float *%b
%B = insertelement <4 x float> undef, float %a, i32 0
%q = call <4 x float> @f()
@ -101,6 +107,7 @@ define <2 x double> @test5() nounwind uwtable readnone noinline {
; CHECK-NEXT: movl $128, %eax
; CHECK-NEXT: cvtsi2sdl %eax, %xmm0
; CHECK-NEXT: retl
;
entry:
%0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double
4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone

View File

@ -65,7 +65,9 @@ define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
;
; AVX512F-LABEL: test_uitofp_v4i32_to_v4f32:
; AVX512F: # BB#0:
; AVX512F-NEXT: # kill
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_uitofp_v4i32_to_v4f32:
@ -142,7 +144,9 @@ define <8 x float> @test_uitofp_v8i32_to_v8f32(<8 x i32> %arg) {
;
; AVX512F-LABEL: test_uitofp_v8i32_to_v8f32:
; AVX512F: # BB#0:
; AVX512F-NEXT: # kill
; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
; AVX512F-NEXT: # kill
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: test_uitofp_v8i32_to_v8f32:

View File

@ -80,6 +80,7 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
; XOP-NEXT: vmovd %edi, %xmm0
; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
; XOP-NEXT: vpextrb $0, %xmm0, %eax
; XOP-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; XOP-NEXT: retq
%b = call i8 @llvm.bitreverse.i8(i8 %a)
ret i8 %b
@ -88,6 +89,7 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
define i16 @test_bitreverse_i16(i16 %a) nounwind {
; SSE-LABEL: test_bitreverse_i16:
; SSE: # BB#0:
; SSE-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE-NEXT: movl %edi, %ecx
; SSE-NEXT: andl $32768, %ecx # imm = 0x8000
; SSE-NEXT: movl %edi, %eax
@ -148,10 +150,12 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; SSE-NEXT: shrl $15, %ecx
; SSE-NEXT: orl %edi, %ecx
; SSE-NEXT: orl %ecx, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX-LABEL: test_bitreverse_i16:
; AVX: # BB#0:
; AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX-NEXT: movl %edi, %ecx
; AVX-NEXT: andl $32768, %ecx # imm = 0x8000
; AVX-NEXT: movl %edi, %eax
@ -212,6 +216,7 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; AVX-NEXT: shrl $15, %ecx
; AVX-NEXT: orl %edi, %ecx
; AVX-NEXT: orl %ecx, %eax
; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX-NEXT: retq
;
; XOP-LABEL: test_bitreverse_i16:
@ -219,6 +224,7 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
; XOP-NEXT: vmovd %edi, %xmm0
; XOP-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
; XOP-NEXT: vmovd %xmm0, %eax
; XOP-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; XOP-NEXT: retq
%b = call i16 @llvm.bitreverse.i16(i16 %a)
ret i16 %b
@ -227,6 +233,7 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind {
define i32 @test_bitreverse_i32(i32 %a) nounwind {
; SSE-LABEL: test_bitreverse_i32:
; SSE: # BB#0:
; SSE-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE-NEXT: movl %edi, %eax
; SSE-NEXT: shll $31, %eax
; SSE-NEXT: movl %edi, %ecx
@ -353,6 +360,7 @@ define i32 @test_bitreverse_i32(i32 %a) nounwind {
;
; AVX-LABEL: test_bitreverse_i32:
; AVX: # BB#0:
; AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX-NEXT: movl %edi, %eax
; AVX-NEXT: shll $31, %eax
; AVX-NEXT: movl %edi, %ecx

View File

@ -148,6 +148,7 @@ define <4 x i1> @test_cmp_v4f64(<4 x double> %a0, <4 x double> %a1) nounwind {
; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -155,6 +156,7 @@ define <4 x i1> @test_cmp_v4f64(<4 x double> %a0, <4 x double> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = fcmp ogt <4 x double> %a0, %a1
ret <4 x i1> %1
@ -200,6 +202,7 @@ define <8 x i1> @test_cmp_v8f32(<8 x float> %a0, <8 x float> %a1) nounwind {
; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -207,6 +210,7 @@ define <8 x i1> @test_cmp_v8f32(<8 x float> %a0, <8 x float> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = fcmp ogt <8 x float> %a0, %a1
ret <8 x i1> %1
@ -267,6 +271,7 @@ define <4 x i1> @test_cmp_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -274,6 +279,7 @@ define <4 x i1> @test_cmp_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = icmp sgt <4 x i64> %a0, %a1
ret <4 x i1> %1
@ -319,6 +325,7 @@ define <8 x i1> @test_cmp_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -326,6 +333,7 @@ define <8 x i1> @test_cmp_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = icmp sgt <8 x i32> %a0, %a1
ret <8 x i1> %1
@ -691,6 +699,7 @@ define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -874,6 +883,7 @@ define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;

View File

@ -27,6 +27,7 @@ define <4 x float> @cvt_4i16_to_4f32(<4 x i16> %a0) {
; ALL-NEXT: movq %rax, %rcx
; ALL-NEXT: movq %rax, %rdx
; ALL-NEXT: movswl %ax, %esi
; ALL-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; ALL-NEXT: shrl $16, %eax
; ALL-NEXT: shrq $32, %rcx
; ALL-NEXT: shrq $48, %rdx
@ -57,6 +58,7 @@ define <4 x float> @cvt_8i16_to_4f32(<8 x i16> %a0) {
; ALL-NEXT: movq %rax, %rcx
; ALL-NEXT: movq %rax, %rdx
; ALL-NEXT: movswl %ax, %esi
; ALL-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; ALL-NEXT: shrl $16, %eax
; ALL-NEXT: shrq $32, %rcx
; ALL-NEXT: shrq $48, %rdx
@ -88,6 +90,7 @@ define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) {
; AVX1-NEXT: movq %rdx, %r8
; AVX1-NEXT: movq %rdx, %r10
; AVX1-NEXT: movswl %dx, %r9d
; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill>
; AVX1-NEXT: shrl $16, %edx
; AVX1-NEXT: shrq $32, %r8
; AVX1-NEXT: shrq $48, %r10
@ -95,6 +98,7 @@ define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) {
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: movq %rdi, %rsi
; AVX1-NEXT: movswl %di, %ecx
; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX1-NEXT: shrl $16, %edi
; AVX1-NEXT: shrq $32, %rax
; AVX1-NEXT: shrq $48, %rsi
@ -135,6 +139,7 @@ define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) {
; AVX2-NEXT: movq %rdx, %r8
; AVX2-NEXT: movq %rdx, %r10
; AVX2-NEXT: movswl %dx, %r9d
; AVX2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill>
; AVX2-NEXT: shrl $16, %edx
; AVX2-NEXT: shrq $32, %r8
; AVX2-NEXT: shrq $48, %r10
@ -142,6 +147,7 @@ define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) {
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: movq %rdi, %rsi
; AVX2-NEXT: movswl %di, %ecx
; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX2-NEXT: shrl $16, %edi
; AVX2-NEXT: shrq $32, %rax
; AVX2-NEXT: shrq $48, %rsi
@ -182,6 +188,7 @@ define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) {
; AVX512-NEXT: movq %rdx, %r8
; AVX512-NEXT: movq %rdx, %r10
; AVX512-NEXT: movswl %dx, %r9d
; AVX512-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill>
; AVX512-NEXT: shrl $16, %edx
; AVX512-NEXT: shrq $32, %r8
; AVX512-NEXT: shrq $48, %r10
@ -189,6 +196,7 @@ define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) {
; AVX512-NEXT: movq %rdi, %rax
; AVX512-NEXT: movq %rdi, %rsi
; AVX512-NEXT: movswl %di, %ecx
; AVX512-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill>
; AVX512-NEXT: shrl $16, %edi
; AVX512-NEXT: shrq $32, %rax
; AVX512-NEXT: shrq $48, %rsi
@ -241,6 +249,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) {
; AVX1-NEXT: movswl %cx, %ecx
; AVX1-NEXT: vmovd %ecx, %xmm9
; AVX1-NEXT: movswl %ax, %ecx
; AVX1-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; AVX1-NEXT: shrl $16, %eax
; AVX1-NEXT: cwtl
; AVX1-NEXT: vmovd %eax, %xmm10
@ -255,6 +264,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) {
; AVX1-NEXT: movswl %cx, %ecx
; AVX1-NEXT: vmovd %ecx, %xmm13
; AVX1-NEXT: movswl %ax, %ecx
; AVX1-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; AVX1-NEXT: shrl $16, %eax
; AVX1-NEXT: cwtl
; AVX1-NEXT: vmovd %eax, %xmm14
@ -269,6 +279,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) {
; AVX1-NEXT: movswl %cx, %ecx
; AVX1-NEXT: vmovd %ecx, %xmm3
; AVX1-NEXT: movswl %ax, %ecx
; AVX1-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; AVX1-NEXT: shrl $16, %eax
; AVX1-NEXT: cwtl
; AVX1-NEXT: vmovd %eax, %xmm4
@ -333,6 +344,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) {
; AVX2-NEXT: movswl %cx, %ecx
; AVX2-NEXT: vmovd %ecx, %xmm9
; AVX2-NEXT: movswl %ax, %ecx
; AVX2-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; AVX2-NEXT: shrl $16, %eax
; AVX2-NEXT: cwtl
; AVX2-NEXT: vmovd %eax, %xmm10
@ -347,6 +359,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) {
; AVX2-NEXT: movswl %cx, %ecx
; AVX2-NEXT: vmovd %ecx, %xmm13
; AVX2-NEXT: movswl %ax, %ecx
; AVX2-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; AVX2-NEXT: shrl $16, %eax
; AVX2-NEXT: cwtl
; AVX2-NEXT: vmovd %eax, %xmm14
@ -361,6 +374,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) {
; AVX2-NEXT: movswl %cx, %ecx
; AVX2-NEXT: vmovd %ecx, %xmm3
; AVX2-NEXT: movswl %ax, %ecx
; AVX2-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; AVX2-NEXT: shrl $16, %eax
; AVX2-NEXT: cwtl
; AVX2-NEXT: vmovd %eax, %xmm4
@ -425,6 +439,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) {
; AVX512-NEXT: movswl %cx, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm9
; AVX512-NEXT: movswl %ax, %ecx
; AVX512-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; AVX512-NEXT: shrl $16, %eax
; AVX512-NEXT: cwtl
; AVX512-NEXT: vmovd %eax, %xmm11
@ -439,6 +454,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) {
; AVX512-NEXT: movswl %cx, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm14
; AVX512-NEXT: movswl %ax, %ecx
; AVX512-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; AVX512-NEXT: shrl $16, %eax
; AVX512-NEXT: cwtl
; AVX512-NEXT: vmovd %eax, %xmm15
@ -453,6 +469,7 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) {
; AVX512-NEXT: movswl %cx, %ecx
; AVX512-NEXT: vmovd %ecx, %xmm1
; AVX512-NEXT: movswl %ax, %ecx
; AVX512-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; AVX512-NEXT: shrl $16, %eax
; AVX512-NEXT: cwtl
; AVX512-NEXT: vmovd %eax, %xmm4
@ -558,6 +575,7 @@ define <4 x float> @load_cvt_8i16_to_4f32(<8 x i16>* %a0) {
; ALL-NEXT: movq %rax, %rcx
; ALL-NEXT: movq %rax, %rdx
; ALL-NEXT: movswl %ax, %esi
; ALL-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; ALL-NEXT: shrl $16, %eax
; ALL-NEXT: shrq $32, %rcx
; ALL-NEXT: shrq $48, %rdx
@ -1441,6 +1459,7 @@ define i16 @cvt_f32_to_i16(float %a0) {
; ALL: # BB#0:
; ALL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; ALL-NEXT: vmovd %xmm0, %eax
; ALL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; ALL-NEXT: retq
%1 = fptrunc float %a0 to half
%2 = bitcast half %1 to i16
@ -2383,6 +2402,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) {
; AVX1-NEXT: movw %ax, %bx
; AVX1-NEXT: shll $16, %ebx
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movzwl %ax, %r14d
@ -2429,6 +2449,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) {
; AVX2-NEXT: movw %ax, %bx
; AVX2-NEXT: shll $16, %ebx
; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movzwl %ax, %r14d
@ -2474,6 +2495,7 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) {
; AVX512-NEXT: movw %ax, %bx
; AVX512-NEXT: shll $16, %ebx
; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movzwl %ax, %r14d
; AVX512-NEXT: orl %ebx, %r14d
@ -2523,6 +2545,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) {
; AVX1-NEXT: movw %ax, %bx
; AVX1-NEXT: shll $16, %ebx
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movzwl %ax, %r14d
@ -2570,6 +2593,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) {
; AVX2-NEXT: movw %ax, %bx
; AVX2-NEXT: shll $16, %ebx
; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movzwl %ax, %r14d
@ -2616,6 +2640,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) {
; AVX512-NEXT: movw %ax, %bx
; AVX512-NEXT: shll $16, %ebx
; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movzwl %ax, %r14d
; AVX512-NEXT: orl %ebx, %r14d
@ -2667,6 +2692,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) {
; AVX1-NEXT: movw %ax, %bx
; AVX1-NEXT: shll $16, %ebx
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movzwl %ax, %r14d
@ -2714,6 +2740,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) {
; AVX2-NEXT: movw %ax, %bx
; AVX2-NEXT: shll $16, %ebx
; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movzwl %ax, %r14d
@ -2760,6 +2787,7 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) {
; AVX512-NEXT: movw %ax, %bx
; AVX512-NEXT: shll $16, %ebx
; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movzwl %ax, %r14d
; AVX512-NEXT: orl %ebx, %r14d
@ -2817,6 +2845,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) {
; AVX1-NEXT: movw %ax, %bx
; AVX1-NEXT: shll $16, %ebx
; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movzwl %ax, %r15d
@ -2842,6 +2871,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) {
; AVX1-NEXT: movw %ax, %bx
; AVX1-NEXT: shll $16, %ebx
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movzwl %ax, %r15d
@ -2897,6 +2927,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) {
; AVX2-NEXT: movw %ax, %bx
; AVX2-NEXT: shll $16, %ebx
; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movzwl %ax, %r15d
@ -2922,6 +2953,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) {
; AVX2-NEXT: movw %ax, %bx
; AVX2-NEXT: shll $16, %ebx
; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movzwl %ax, %r15d
@ -2975,6 +3007,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) {
; AVX512-NEXT: movw %ax, %bx
; AVX512-NEXT: shll $16, %ebx
; AVX512-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movzwl %ax, %r15d
; AVX512-NEXT: orl %ebx, %r15d
@ -2999,6 +3032,7 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) {
; AVX512-NEXT: movw %ax, %bx
; AVX512-NEXT: shll $16, %ebx
; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movzwl %ax, %r15d
; AVX512-NEXT: orl %ebx, %r15d
@ -3126,6 +3160,7 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, <4 x i16>* %a1) {
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movl %eax, %r15d
; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movl %eax, %ebp
@ -3181,6 +3216,7 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, <4 x i16>* %a1) {
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movl %eax, %r15d
; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movl %eax, %ebp
@ -3234,6 +3270,7 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, <4 x i16>* %a1) {
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movl %eax, %r15d
; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movl %eax, %ebp
; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
@ -3283,6 +3320,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) {
; AVX1-NEXT: movw %ax, %bp
; AVX1-NEXT: shll $16, %ebp
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movzwl %ax, %ebx
@ -3338,6 +3376,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) {
; AVX2-NEXT: movw %ax, %bp
; AVX2-NEXT: shll $16, %ebp
; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movzwl %ax, %ebx
@ -3392,6 +3431,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) {
; AVX512-NEXT: movw %ax, %bp
; AVX512-NEXT: shll $16, %ebp
; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movzwl %ax, %ebx
; AVX512-NEXT: orl %ebp, %ebx
@ -3452,6 +3492,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) {
; AVX1-NEXT: movw %ax, %bp
; AVX1-NEXT: shll $16, %ebp
; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movzwl %ax, %ebx
@ -3507,6 +3548,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) {
; AVX2-NEXT: movw %ax, %bp
; AVX2-NEXT: shll $16, %ebp
; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movzwl %ax, %ebx
@ -3561,6 +3603,7 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) {
; AVX512-NEXT: movw %ax, %bp
; AVX512-NEXT: shll $16, %ebp
; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movzwl %ax, %ebx
; AVX512-NEXT: orl %ebp, %ebx
@ -3655,6 +3698,7 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) {
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movl %eax, %r13d
; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movl %eax, %ebp
@ -3662,6 +3706,7 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) {
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movl %eax, %r14d
; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movl %eax, %r15d
@ -3748,6 +3793,7 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) {
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movl %eax, %r13d
; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movl %eax, %ebp
@ -3755,6 +3801,7 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) {
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movl %eax, %r14d
; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movl %eax, %r15d
@ -3838,12 +3885,14 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) {
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movl %eax, %r13d
; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movl %eax, %ebp
; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movl %eax, %r14d
; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: callq __truncdfhf2
; AVX512-NEXT: movl %eax, %r15d
; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload

View File

@ -1494,6 +1494,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %cl, %al
; AVX512BW-NEXT: movb $7, %dil
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %dl
; AVX512BW-NEXT: movzbl %dl, %edx
@ -1506,6 +1507,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %cl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %cl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %sil
; AVX512BW-NEXT: movzbl %sil, %eax
@ -1520,6 +1522,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1533,6 +1536,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1546,6 +1550,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1559,6 +1564,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1572,6 +1578,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1585,6 +1592,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1598,6 +1606,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1611,6 +1620,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1624,6 +1634,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1637,6 +1648,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1650,6 +1662,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1663,6 +1676,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1676,6 +1690,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1689,6 +1704,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1703,6 +1719,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %esi
@ -1715,6 +1732,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %cl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %cl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %dl
; AVX512BW-NEXT: movzbl %dl, %eax
@ -1729,6 +1747,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1742,6 +1761,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1755,6 +1775,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1768,6 +1789,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1781,6 +1803,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1794,6 +1817,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1807,6 +1831,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1820,6 +1845,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1833,6 +1859,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1846,6 +1873,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1859,6 +1887,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1872,6 +1901,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1885,6 +1915,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1898,6 +1929,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1913,6 +1945,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %esi
@ -1925,6 +1958,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %cl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %cl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %dl
; AVX512BW-NEXT: movzbl %dl, %eax
@ -1939,6 +1973,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1952,6 +1987,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1965,6 +2001,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1978,6 +2015,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -1991,6 +2029,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2004,6 +2043,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2017,6 +2057,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2030,6 +2071,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2043,6 +2085,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2056,6 +2099,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2069,6 +2113,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2082,6 +2127,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2095,6 +2141,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2108,6 +2155,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2121,6 +2169,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %esi
@ -2133,6 +2182,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %cl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %cl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %dl
; AVX512BW-NEXT: movzbl %dl, %eax
@ -2147,6 +2197,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2160,6 +2211,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2173,6 +2225,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2186,6 +2239,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2199,6 +2253,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2212,6 +2267,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2225,6 +2281,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2238,6 +2295,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2251,6 +2309,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2264,6 +2323,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2277,6 +2337,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2290,6 +2351,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2303,6 +2365,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax
@ -2316,6 +2379,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
; AVX512BW-NEXT: shrb $7, %dl
; AVX512BW-NEXT: sarb $2, %al
; AVX512BW-NEXT: addb %dl, %al
; AVX512BW-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: mulb %dil
; AVX512BW-NEXT: subb %al, %cl
; AVX512BW-NEXT: movzbl %cl, %eax

View File

@ -105,7 +105,9 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
;
; AVX512CD-LABEL: testv2i64:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
;
; X32-SSE-LABEL: testv2i64:
@ -221,7 +223,9 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
;
; AVX512CD-LABEL: testv2i64u:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
;
; X32-SSE-LABEL: testv2i64u:
@ -408,7 +412,9 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
;
; AVX512CD-LABEL: testv4i32:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
;
; X32-SSE-LABEL: testv4i32:
@ -571,7 +577,9 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
;
; AVX512CD-LABEL: testv4i32u:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
;
; X32-SSE-LABEL: testv4i32u:

View File

@ -70,7 +70,9 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
;
; AVX512CD-LABEL: testv4i64:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
@ -138,7 +140,9 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
;
; AVX512CD-LABEL: testv4i64u:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntq %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
@ -220,7 +224,9 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
;
; AVX512CD-LABEL: testv8i32:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
@ -293,7 +299,9 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
;
; AVX512CD-LABEL: testv8i32u:
; AVX512CD: ## BB#0:
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
; AVX512CD-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512CD-NEXT: retq
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)

View File

@ -786,6 +786,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
@ -967,6 +968,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
@ -1161,6 +1163,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
; AVX512-NEXT: movzbl (%rdi), %eax
; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE41-LABEL: load_sext_4i1_to_4i64:

View File

@ -311,6 +311,7 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -323,7 +324,10 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: var_shift_v8i16:
@ -1218,6 +1222,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
; AVX2-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -1230,8 +1235,10 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: constant_shift_v8i16:

View File

@ -214,7 +214,10 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = ashr <16 x i16> %a, %b
ret <16 x i16> %shift
@ -818,8 +821,10 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512-NEXT: vpsravw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift

View File

@ -280,6 +280,7 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -292,7 +293,10 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: var_shift_v8i16:
@ -952,6 +956,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
; AVX2-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -964,8 +969,10 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: constant_shift_v8i16:

View File

@ -191,7 +191,10 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = lshr <16 x i16> %a, %b
ret <16 x i16> %shift
@ -673,8 +676,10 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift

View File

@ -237,6 +237,7 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -247,7 +248,10 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: var_shift_v8i16:
@ -837,8 +841,10 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v8i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
;
; X32-SSE-LABEL: constant_shift_v8i16:

View File

@ -166,7 +166,10 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
;
; AVX512-LABEL: var_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = shl <16 x i16> %a, %b
ret <16 x i16> %shift
@ -585,8 +588,10 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
;
; AVX512-LABEL: constant_shift_v16i16:
; AVX512: ## BB#0:
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
; AVX512-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512-NEXT: retq
%shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
ret <16 x i16> %shift

View File

@ -1223,12 +1223,14 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
; AVX1-LABEL: insert_reg_and_zero_v4f64:
; AVX1: # BB#0:
; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: insert_reg_and_zero_v4f64:
; AVX2: # BB#0:
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX2-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
; AVX2-NEXT: retq

View File

@ -73,6 +73,7 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
; CHECK-NEXT: retq
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@ -96,6 +97,7 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
; CHECK-NEXT: retq
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
@ -121,6 +123,7 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
; CHECK-LABEL: combine_permd_as_vpbroadcastd256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: retq
@ -142,6 +145,7 @@ define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
; CHECK-LABEL: combine_permd_as_vpbroadcastq256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; CHECK-NEXT: retq
@ -165,6 +169,7 @@ define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
; CHECK-LABEL: combine_permd_as_vpbroadcastss256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
; CHECK-NEXT: retq
%1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@ -175,6 +180,7 @@ define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) {
; CHECK-LABEL: combine_permd_as_vpbroadcastsd256:
; CHECK: # BB#0:
; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
; CHECK-NEXT: retq
%1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>

View File

@ -1,4 +1,3 @@
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq| FileCheck %s --check-prefix=VL_BW_DQ
@ -200,6 +199,7 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
@ -212,6 +212,7 @@ define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector < 8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 10, i32 2, i32 9, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
@ -228,6 +229,7 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
@ -238,6 +240,7 @@ define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector < 8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
@ -256,6 +259,7 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
@ -268,6 +272,7 @@ define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector <8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
@ -286,6 +291,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
@ -298,6 +304,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector <8 x i1> zeroinitializer, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 10, i32 3, i32 7, i32 7, i32 0>
@ -319,6 +326,7 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
@ -333,6 +341,7 @@ define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 1>
@ -353,6 +362,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; AVX512F-NEXT: vpsllq $63, %zmm2, %zmm0
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
@ -366,6 +376,7 @@ define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
; VL_BW_DQ-NEXT: vpsllq $63, %zmm2, %zmm0
; VL_BW_DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovb %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%c = shufflevector <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1> %a, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
%c1 = bitcast <8 x i1>%c to i8
@ -382,6 +393,7 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
@ -392,6 +404,7 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
; VL_BW_DQ-NEXT: vpslld $31, %zmm0, %zmm0
; VL_BW_DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
; VL_BW_DQ-NEXT: kmovw %k0, %eax
; VL_BW_DQ-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; VL_BW_DQ-NEXT: retq
%b = bitcast i16 %a to <16 x i1>
%c = shufflevector < 16 x i1> %b, <16 x i1> undef, <16 x i32> zeroinitializer
@ -400,6 +413,40 @@ define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
}
define i64 @shuf64i1_zero(i64 %a) {
; AVX512F-LABEL: shuf64i1_zero:
; AVX512F: # BB#0:
; AVX512F-NEXT: pushq %rbp
; AVX512F-NEXT: .Ltmp0:
; AVX512F-NEXT: .cfi_def_cfa_offset 16
; AVX512F-NEXT: .Ltmp1:
; AVX512F-NEXT: .cfi_offset %rbp, -16
; AVX512F-NEXT: movq %rsp, %rbp
; AVX512F-NEXT: .Ltmp2:
; AVX512F-NEXT: .cfi_def_cfa_register %rbp
; AVX512F-NEXT: andq $-32, %rsp
; AVX512F-NEXT: subq $96, %rsp
; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, (%rsp)
; AVX512F-NEXT: movl (%rsp), %ecx
; AVX512F-NEXT: movq %rcx, %rax
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rcx, %rax
; AVX512F-NEXT: movq %rbp, %rsp
; AVX512F-NEXT: popq %rbp
; AVX512F-NEXT: retq
;
; VL_BW_DQ-LABEL: shuf64i1_zero:
; VL_BW_DQ: # BB#0:
; VL_BW_DQ-NEXT: kmovq %rdi, %k0

View File

@ -198,6 +198,12 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i
define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i16 %i1, i16 %i2, i16 %i3, i16 %i4, i16 %i5, i16 %i6, i16 %i7) nounwind {
; SSE2-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
; SSE2: # BB#0:
; SSE2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE2-NEXT: movswq %di, %rax
; SSE2-NEXT: movswq %si, %rsi
; SSE2-NEXT: movswq %dx, %rdx
@ -234,6 +240,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
;
; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
; SSSE3: # BB#0:
; SSSE3-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSSE3-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSSE3-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSSE3-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSSE3-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSSE3-NEXT: movswq %di, %rax
; SSSE3-NEXT: movswq %si, %rsi
; SSSE3-NEXT: movswq %dx, %rdx
@ -271,6 +283,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
; SSE41-LABEL: var_shuffle_v8i16_v8i16_xxxxxxxx_i16:
; SSE41: # BB#0:
; SSE41-NEXT: pushq %rbx
; SSE41-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE41-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE41-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE41-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE41-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE41-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE41-NEXT: movswq %di, %rax
; SSE41-NEXT: movswq %si, %rbx
; SSE41-NEXT: movswq %dx, %r11
@ -298,6 +316,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
; AVX: # BB#0:
; AVX-NEXT: pushq %r14
; AVX-NEXT: pushq %rbx
; AVX-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; AVX-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; AVX-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; AVX-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX-NEXT: movswq %di, %r10
; AVX-NEXT: movswq %si, %r11
; AVX-NEXT: movswq %dx, %r14
@ -343,6 +367,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xxxxxxxx_i16(<8 x i16> %x, i16 %i0, i1
define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %i0, i8 %i1, i8 %i2, i8 %i3, i8 %i4, i8 %i5, i8 %i6, i8 %i7, i8 %i8, i8 %i9, i8 %i10, i8 %i11, i8 %i12, i8 %i13, i8 %i14, i8 %i15) nounwind {
; SSE2-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
; SSE2: # BB#0:
; SSE2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movsbq {{[0-9]+}}(%rsp), %r10
; SSE2-NEXT: leaq -{{[0-9]+}}(%rsp), %r11
@ -412,6 +442,12 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
;
; SSSE3-LABEL: var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8:
; SSSE3: # BB#0:
; SSSE3-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSSE3-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSSE3-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSSE3-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSSE3-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSSE3-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSSE3-NEXT: movsbq {{[0-9]+}}(%rsp), %r10
; SSSE3-NEXT: leaq -{{[0-9]+}}(%rsp), %r11
@ -487,6 +523,12 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
; SSE41-NEXT: pushq %r13
; SSE41-NEXT: pushq %r12
; SSE41-NEXT: pushq %rbx
; SSE41-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE41-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE41-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE41-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE41-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE41-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE41-NEXT: movsbq %dil, %r15
; SSE41-NEXT: movsbq %sil, %r14
; SSE41-NEXT: movsbq %dl, %r11
@ -548,6 +590,12 @@ define <16 x i8> @var_shuffle_v16i8_v16i8_xxxxxxxxxxxxxxxx_i8(<16 x i8> %x, i8 %
; AVX-NEXT: pushq %r13
; AVX-NEXT: pushq %r12
; AVX-NEXT: pushq %rbx
; AVX-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; AVX-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; AVX-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; AVX-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX-NEXT: movsbq %dil, %r10
; AVX-NEXT: movsbq %sil, %r11
; AVX-NEXT: movsbq %dl, %r14
@ -1097,6 +1145,12 @@ define <4 x float> @var_shuffle_v4f32_v4f32_x0yx_i32(<4 x float> %x, <4 x float>
define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %y, i16 %i0, i16 %i1, i16 %i2, i16 %i3, i16 %i4, i16 %i5, i16 %i6, i16 %i7) nounwind {
; SSE2-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; SSE2: # BB#0:
; SSE2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE2-NEXT: movswq %di, %r10
; SSE2-NEXT: movswq %si, %rsi
; SSE2-NEXT: movswq %dx, %r11
@ -1130,6 +1184,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
;
; SSSE3-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; SSSE3: # BB#0:
; SSSE3-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSSE3-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSSE3-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSSE3-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSSE3-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSSE3-NEXT: movswq %di, %r10
; SSSE3-NEXT: movswq %si, %rsi
; SSSE3-NEXT: movswq %dx, %r11
@ -1163,6 +1223,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
;
; SSE41-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; SSE41: # BB#0:
; SSE41-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; SSE41-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; SSE41-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; SSE41-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; SSE41-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; SSE41-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; SSE41-NEXT: movswq %di, %rax
; SSE41-NEXT: movswq %si, %rsi
; SSE41-NEXT: movswq %dx, %rdx
@ -1184,6 +1250,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
;
; AVX1-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; AVX1: # BB#0:
; AVX1-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; AVX1-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; AVX1-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX1-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX1-NEXT: movswq %di, %r10
; AVX1-NEXT: movswq %si, %r11
; AVX1-NEXT: movswq %dx, %rdx
@ -1205,6 +1277,12 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
;
; AVX2-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
; AVX2: # BB#0:
; AVX2-NEXT: # kill: %R9D<def> %R9D<kill> %R9<def>
; AVX2-NEXT: # kill: %R8D<def> %R8D<kill> %R8<def>
; AVX2-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def>
; AVX2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<def>
; AVX2-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; AVX2-NEXT: movswq %di, %r10
; AVX2-NEXT: movswq %si, %r11
; AVX2-NEXT: movswq %dx, %rdx

View File

@ -36,6 +36,7 @@ define <4 x i32> @trunc_add_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -43,6 +44,7 @@ define <4 x i32> @trunc_add_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = add <4 x i64> %a0, %a1
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -105,6 +107,7 @@ define <8 x i16> @trunc_add_v8i64_8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -148,6 +151,7 @@ define <8 x i16> @trunc_add_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -155,6 +159,7 @@ define <8 x i16> @trunc_add_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = add <8 x i32> %a0, %a1
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -379,6 +384,7 @@ define <16 x i8> @trunc_add_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = add <16 x i16> %a0, %a1
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -421,6 +427,7 @@ define <4 x i32> @trunc_add_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -428,6 +435,7 @@ define <4 x i32> @trunc_add_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = add <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -495,6 +503,7 @@ define <8 x i16> @trunc_add_const_v16i64_v16i16(<8 x i64> %a0) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -537,6 +546,7 @@ define <8 x i16> @trunc_add_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -544,6 +554,7 @@ define <8 x i16> @trunc_add_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = add <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -767,6 +778,7 @@ define <16 x i8> @trunc_add_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpaddw {{.*}}(%rip), %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = add <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -804,6 +816,7 @@ define <4 x i32> @trunc_sub_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -811,6 +824,7 @@ define <4 x i32> @trunc_sub_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = sub <4 x i64> %a0, %a1
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -873,6 +887,7 @@ define <8 x i16> @trunc_sub_v8i64_8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -916,6 +931,7 @@ define <8 x i16> @trunc_sub_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -923,6 +939,7 @@ define <8 x i16> @trunc_sub_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = sub <8 x i32> %a0, %a1
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -1147,6 +1164,7 @@ define <16 x i8> @trunc_sub_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpsubw %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = sub <16 x i16> %a0, %a1
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -1189,6 +1207,7 @@ define <4 x i32> @trunc_sub_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX2-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -1196,6 +1215,7 @@ define <4 x i32> @trunc_sub_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = sub <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -1262,6 +1282,7 @@ define <8 x i16> @trunc_sub_const_v16i64_v16i16(<8 x i64> %a0) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -1304,6 +1325,7 @@ define <8 x i16> @trunc_sub_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX2-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -1311,6 +1333,7 @@ define <8 x i16> @trunc_sub_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = sub <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -1534,6 +1557,7 @@ define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = sub <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -1615,6 +1639,7 @@ define <4 x i32> @trunc_mul_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -1630,6 +1655,7 @@ define <4 x i32> @trunc_mul_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX512-NEXT: vpsllq $32, %ymm0, %ymm0
; AVX512-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = mul <4 x i64> %a0, %a1
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -1780,6 +1806,7 @@ define <8 x i16> @trunc_mul_v8i64_8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -1843,6 +1870,7 @@ define <8 x i16> @trunc_mul_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -1850,6 +1878,7 @@ define <8 x i16> @trunc_mul_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = mul <8 x i32> %a0, %a1
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -2298,6 +2327,7 @@ define <16 x i8> @trunc_mul_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = mul <16 x i16> %a0, %a1
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -2365,6 +2395,7 @@ define <4 x i32> @trunc_mul_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -2377,6 +2408,7 @@ define <4 x i32> @trunc_mul_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX512-NEXT: vpsllq $32, %ymm0, %ymm0
; AVX512-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = mul <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -2495,6 +2527,7 @@ define <8 x i16> @trunc_mul_const_v16i64_v16i16(<8 x i64> %a0) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -2556,6 +2589,7 @@ define <8 x i16> @trunc_mul_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX2-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -2563,6 +2597,7 @@ define <8 x i16> @trunc_mul_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = mul <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -2930,6 +2965,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = mul <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -2965,6 +3001,7 @@ define <4 x i32> @trunc_and_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -2972,6 +3009,7 @@ define <4 x i32> @trunc_and_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = and <4 x i64> %a0, %a1
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -3030,6 +3068,7 @@ define <8 x i16> @trunc_and_v8i64_8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -3071,6 +3110,7 @@ define <8 x i16> @trunc_and_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -3078,6 +3118,7 @@ define <8 x i16> @trunc_and_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = and <8 x i32> %a0, %a1
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -3288,6 +3329,7 @@ define <16 x i8> @trunc_and_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = and <16 x i16> %a0, %a1
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -3326,6 +3368,7 @@ define <4 x i32> @trunc_and_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -3333,6 +3376,7 @@ define <4 x i32> @trunc_and_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = and <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -3395,6 +3439,7 @@ define <8 x i16> @trunc_and_const_v16i64_v16i16(<8 x i64> %a0) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -3436,6 +3481,7 @@ define <8 x i16> @trunc_and_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -3443,6 +3489,7 @@ define <8 x i16> @trunc_and_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = and <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -3656,6 +3703,7 @@ define <16 x i8> @trunc_and_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = and <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -3691,6 +3739,7 @@ define <4 x i32> @trunc_xor_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -3698,6 +3747,7 @@ define <4 x i32> @trunc_xor_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = xor <4 x i64> %a0, %a1
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -3756,6 +3806,7 @@ define <8 x i16> @trunc_xor_v8i64_8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -3797,6 +3848,7 @@ define <8 x i16> @trunc_xor_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -3804,6 +3856,7 @@ define <8 x i16> @trunc_xor_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = xor <8 x i32> %a0, %a1
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -4014,6 +4067,7 @@ define <16 x i8> @trunc_xor_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = xor <16 x i16> %a0, %a1
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -4052,6 +4106,7 @@ define <4 x i32> @trunc_xor_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -4059,6 +4114,7 @@ define <4 x i32> @trunc_xor_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = xor <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -4121,6 +4177,7 @@ define <8 x i16> @trunc_xor_const_v16i64_v16i16(<8 x i64> %a0) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -4162,6 +4219,7 @@ define <8 x i16> @trunc_xor_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX2-NEXT: vpxor {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -4169,6 +4227,7 @@ define <8 x i16> @trunc_xor_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = xor <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -4382,6 +4441,7 @@ define <16 x i8> @trunc_xor_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = xor <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -4417,6 +4477,7 @@ define <4 x i32> @trunc_or_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -4424,6 +4485,7 @@ define <4 x i32> @trunc_or_v4i64_4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = or <4 x i64> %a0, %a1
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -4482,6 +4544,7 @@ define <8 x i16> @trunc_or_v8i64_8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -4523,6 +4586,7 @@ define <8 x i16> @trunc_or_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -4530,6 +4594,7 @@ define <8 x i16> @trunc_or_v8i32_8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = or <8 x i32> %a0, %a1
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -4740,6 +4805,7 @@ define <16 x i8> @trunc_or_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vorps %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = or <16 x i16> %a0, %a1
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -4778,6 +4844,7 @@ define <4 x i32> @trunc_or_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX2-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -4785,6 +4852,7 @@ define <4 x i32> @trunc_or_const_v4i64_4i32(<4 x i64> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = or <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3>
%2 = trunc <4 x i64> %1 to <4 x i32>
@ -4847,6 +4915,7 @@ define <8 x i16> @trunc_or_const_v16i64_v16i16(<8 x i64> %a0) nounwind {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -4888,6 +4957,7 @@ define <8 x i16> @trunc_or_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX2-NEXT: vpor {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -4895,6 +4965,7 @@ define <8 x i16> @trunc_or_const_v16i32_v16i16(<8 x i32> %a0) nounwind {
; AVX512: # BB#0:
; AVX512-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = or <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = trunc <8 x i32> %1 to <8 x i16>
@ -5108,6 +5179,7 @@ define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind {
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vorps {{.*}}(%rip), %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
%1 = or <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
%2 = trunc <16 x i16> %1 to <16 x i8>
@ -5213,6 +5285,7 @@ define <4 x i32> @mul_add_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind {
; AVX2-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,2,4,6,4,6]
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -5231,6 +5304,7 @@ define <4 x i32> @mul_add_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind {
; AVX512-NEXT: vpaddq %ymm0, %ymm2, %ymm0
; AVX512-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512-NEXT: retq
%1 = sext <4 x i32> %a0 to <4 x i64>
%2 = sext <4 x i32> %a1 to <4 x i64>

View File

@ -143,6 +143,7 @@ define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@ -251,12 +252,15 @@ define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512BW-LABEL: trunc8i32_8i16:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
entry:
%0 = trunc <8 x i32> %a to <8 x i16>
@ -314,6 +318,7 @@ define void @trunc8i32_8i8(<8 x i32> %a) {
;
; AVX512BW-LABEL: trunc8i32_8i8:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; AVX512BW-NEXT: vmovq %xmm0, (%rax)
@ -434,6 +439,8 @@ define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
;
; AVX512BW-LABEL: trunc2x4i64_8i32:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
; AVX512BW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
@ -534,6 +541,8 @@ define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
;
; AVX512BW-LABEL: trunc2x4i64_8i16:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]

View File

@ -141,6 +141,9 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: and_v3i8_as_i24:
@ -155,6 +158,9 @@ define <3 x i8> @and_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
@ -176,6 +182,9 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: xor_v3i8_as_i24:
@ -190,6 +199,9 @@ define <3 x i8> @xor_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24
@ -211,6 +223,9 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X32-SSE-NEXT: pextrb $0, %xmm1, %eax
; X32-SSE-NEXT: pextrb $4, %xmm1, %edx
; X32-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X32-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X32-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: or_v3i8_as_i24:
@ -225,6 +240,9 @@ define <3 x i8> @or_v3i8_as_i24(<3 x i8> %a, <3 x i8> %b) nounwind {
; X64-SSE-NEXT: pextrb $0, %xmm1, %eax
; X64-SSE-NEXT: pextrb $4, %xmm1, %edx
; X64-SSE-NEXT: pextrb $8, %xmm1, %ecx
; X64-SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-SSE-NEXT: # kill: %DL<def> %DL<kill> %EDX<kill>
; X64-SSE-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill>
; X64-SSE-NEXT: retq
%1 = bitcast <3 x i8> %a to i24
%2 = bitcast <3 x i8> %b to i24

View File

@ -185,7 +185,7 @@ attributes #2 = { "no-frame-pointer-elim"="false" nounwind }
; CHECK-NEXT: je [[STRINGS_EQUAL]]
;
; CHECK: [[STRINGS_EQUAL]]
; CHECK-NEXT: popq
; CHECK: popq
define zeroext i1 @segmentedStack(i8* readonly %vk1, i8* readonly %vk2, i64 %key_size) #5 {
entry:
%cmp.i = icmp eq i8* %vk1, null

View File

@ -746,6 +746,7 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
; KNL-NEXT: kmovw %eax, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: # kill
; KNL-NEXT: retq
%mul = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %c1, i64 160)
%mul.overflow = extractvalue { i64, i1 } %mul, 1