forked from OSchip/llvm-project
[X86] Teach X86FixupBWInsts to promote MOV8rr/MOV16rr to MOV32rr.
Codesize is less (16) or equal (8), and we avoid partial dependencies. Differential Revision: http://reviews.llvm.org/D19999 llvm-svn: 268760
This commit is contained in:
parent
d97dd11f2f
commit
258426ca7a
|
@ -93,6 +93,11 @@ class FixupBWInstPass : public MachineFunctionPass {
|
|||
/// OK, otherwise return nullptr.
|
||||
MachineInstr *tryReplaceLoad(unsigned New32BitOpcode, MachineInstr *MI) const;
|
||||
|
||||
/// Change the MachineInstr \p MI into the equivalent 32-bit copy if it is
|
||||
/// safe to do so. Return the replacement instruction if OK, otherwise return
|
||||
/// nullptr.
|
||||
MachineInstr *tryReplaceCopy(MachineInstr *MI) const;
|
||||
|
||||
public:
|
||||
FixupBWInstPass() : MachineFunctionPass(ID) {}
|
||||
|
||||
|
@ -219,6 +224,39 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
|
|||
return MIB;
|
||||
}
|
||||
|
||||
MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
|
||||
assert(MI->getNumExplicitOperands() == 2);
|
||||
auto &OldDest = MI->getOperand(0);
|
||||
auto &OldSrc = MI->getOperand(1);
|
||||
|
||||
unsigned NewDestReg;
|
||||
if (!getSuperRegDestIfDead(MI, NewDestReg))
|
||||
return nullptr;
|
||||
|
||||
unsigned NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32);
|
||||
|
||||
// This is only correct if we access the same subregister index: otherwise,
|
||||
// we could try to replace "movb %ah, %al" with "movl %eax, %eax".
|
||||
auto *TRI = &TII->getRegisterInfo();
|
||||
if (TRI->getSubRegIndex(NewSrcReg, OldSrc.getReg()) !=
|
||||
TRI->getSubRegIndex(NewDestReg, OldDest.getReg()))
|
||||
return nullptr;
|
||||
|
||||
// Safe to change the instruction.
|
||||
// Don't set src flags, as we don't know if we're also killing the superreg.
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*MF, MI->getDebugLoc(), TII->get(X86::MOV32rr), NewDestReg)
|
||||
.addReg(NewSrcReg);
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Make sure we didn't drop any other implicit operand.
|
||||
for (auto &Op : MI->implicit_operands())
|
||||
assert(Op.getReg() == NewDestReg && "Copy imp-defs unrelated reg?");
|
||||
#endif
|
||||
|
||||
return MIB;
|
||||
}
|
||||
|
||||
void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) {
|
||||
|
||||
|
@ -265,6 +303,15 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
|
|||
NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI);
|
||||
break;
|
||||
|
||||
case X86::MOV8rr:
|
||||
case X86::MOV16rr:
|
||||
// Always try to replace 8/16 bit copies with a 32 bit copy.
|
||||
// Code size is either less (16) or equal (8), and there is sometimes a
|
||||
// perf advantage from eliminating a false dependence on the upper portion
|
||||
// of the register.
|
||||
NewMI = tryReplaceCopy(MI);
|
||||
break;
|
||||
|
||||
default:
|
||||
// nothing to do here.
|
||||
break;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
define i8 @f(i8 %v1, i8 %v2) nounwind {
|
||||
entry:
|
||||
; CHECK: callq
|
||||
; CHECK: movb %{{.*}}, %al
|
||||
; CHECK: movl %{{.*}}, %eax
|
||||
; CHECK: mulb
|
||||
; CHECK: mulb
|
||||
%rval = tail call i8 @bar() nounwind
|
||||
|
|
|
@ -39,7 +39,7 @@ define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
|
|||
; X64-LABEL: bar:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: xorl %edx, %edx
|
||||
; X64-NEXT: movw %di, %ax
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: divw %si
|
||||
; X64-NEXT: andl $1, %eax
|
||||
; X64-NEXT: retq
|
||||
|
|
|
@ -461,7 +461,7 @@ define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
|
|||
; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; KNL_X32-NEXT: movl %edi, (%esp)
|
||||
; KNL_X32-NEXT: calll _test11
|
||||
; KNL_X32-NEXT: movb %al, %bl
|
||||
; KNL_X32-NEXT: movl %eax, %ebx
|
||||
; KNL_X32-NEXT: movzbl %bl, %eax
|
||||
; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
|
||||
|
|
|
@ -81,7 +81,7 @@ define i16 @mand16(i16 %x, i16 %y) {
|
|||
; CHECK-NEXT: xorl %esi, %eax
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: orl %eax, %edi
|
||||
; CHECK-NEXT: movw %di, %ax
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%ma = bitcast i16 %x to <16 x i1>
|
||||
%mb = bitcast i16 %y to <16 x i1>
|
||||
|
|
|
@ -72,7 +72,7 @@ define i8 @select05(i8 %a.0, i8 %m) {
|
|||
; CHECK-LABEL: select05:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: orl %esi, %edi
|
||||
; CHECK-NEXT: movb %dil, %al
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %m to <8 x i1>
|
||||
%a = bitcast i8 %a.0 to <8 x i1>
|
||||
|
@ -102,7 +102,7 @@ define i8 @select06(i8 %a.0, i8 %m) {
|
|||
; CHECK-LABEL: select06:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: movb %dil, %al
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%mask = bitcast i8 %m to <8 x i1>
|
||||
%a = bitcast i8 %a.0 to <8 x i1>
|
||||
|
|
|
@ -36,7 +36,7 @@ define i8 @mand8(i8 %x, i8 %y) {
|
|||
; CHECK-NEXT: xorl %esi, %eax
|
||||
; CHECK-NEXT: andl %esi, %edi
|
||||
; CHECK-NEXT: orl %eax, %edi
|
||||
; CHECK-NEXT: movb %dil, %al
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%ma = bitcast i8 %x to <8 x i1>
|
||||
%mb = bitcast i8 %y to <8 x i1>
|
||||
|
|
|
@ -250,14 +250,14 @@ attributes #0 = { nounwind }
|
|||
; CMOV-DAG: movb $20, %al
|
||||
; CMOV-DAG: movb $20, %dl
|
||||
; CMOV: jl [[BB0:.LBB[0-9_]+]]
|
||||
; CMOV: movb %cl, %dl
|
||||
; CMOV: movl %ecx, %edx
|
||||
; CMOV: [[BB0]]:
|
||||
; CMOV: jg [[BB1:.LBB[0-9_]+]]
|
||||
; CMOV: movb %dl, %al
|
||||
; CMOV: movl %edx, %eax
|
||||
; CMOV: [[BB1]]:
|
||||
; CMOV: testl %edi, %edi
|
||||
; CMOV: je [[BB2:.LBB[0-9_]+]]
|
||||
; CMOV: movb %dl, %al
|
||||
; CMOV: movl %edx, %eax
|
||||
; CMOV: [[BB2]]:
|
||||
; CMOV: movb %al, g8(%rip)
|
||||
; CMOV: retq
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
|
||||
; RUN: llc -fixup-byte-word-insts=1 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWON64 %s
|
||||
; RUN: llc -fixup-byte-word-insts=0 -mtriple=x86_64-- < %s | FileCheck --check-prefix=X64 --check-prefix=BWOFF64 %s
|
||||
; RUN: llc -fixup-byte-word-insts=1 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWON32 %s
|
||||
; RUN: llc -fixup-byte-word-insts=0 -mtriple=i386-- < %s | FileCheck --check-prefix=X32 --check-prefix=BWOFF32 %s
|
||||
|
||||
target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
|
||||
|
||||
define i8 @test_movb(i8 %a0) {
|
||||
; BWON64-LABEL: test_movb:
|
||||
; BWON64: # BB#0:
|
||||
; BWON64-NEXT: movl %edi, %eax
|
||||
; BWON64-NEXT: retq
|
||||
;
|
||||
; BWOFF64-LABEL: test_movb:
|
||||
; BWOFF64: # BB#0:
|
||||
; BWOFF64-NEXT: movb %dil, %al
|
||||
; BWOFF64-NEXT: retq
|
||||
;
|
||||
; X32-LABEL: test_movb:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
|
||||
; X32-NEXT: retl
|
||||
ret i8 %a0
|
||||
}
|
||||
|
||||
define i16 @test_movw(i16 %a0) {
|
||||
; BWON64-LABEL: test_movw:
|
||||
; BWON64: # BB#0:
|
||||
; BWON64-NEXT: movl %edi, %eax
|
||||
; BWON64-NEXT: retq
|
||||
;
|
||||
; BWOFF64-LABEL: test_movw:
|
||||
; BWOFF64: # BB#0:
|
||||
; BWOFF64-NEXT: movw %di, %ax
|
||||
; BWOFF64-NEXT: retq
|
||||
;
|
||||
; BWON32-LABEL: test_movw:
|
||||
; BWON32: # BB#0:
|
||||
; BWON32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; BWON32-NEXT: retl
|
||||
;
|
||||
; BWOFF32-LABEL: test_movw:
|
||||
; BWOFF32: # BB#0:
|
||||
; BWOFF32-NEXT: movw {{[0-9]+}}(%esp), %ax
|
||||
; BWOFF32-NEXT: retl
|
||||
ret i16 %a0
|
||||
}
|
||||
|
||||
; Verify we don't mess with H-reg copies (only generated in 32-bit mode).
|
||||
define i8 @test_movb_hreg(i16 %a0) {
|
||||
; X64-LABEL: test_movb_hreg:
|
||||
; X64: # BB#0:
|
||||
; X64-NEXT: movl %edi, %eax
|
||||
; X64-NEXT: shrl $8, %eax
|
||||
; X64-NEXT: addb %dil, %al
|
||||
; X64-NEXT: retq
|
||||
;
|
||||
; X32-LABEL: test_movb_hreg:
|
||||
; X32: # BB#0:
|
||||
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
|
||||
; X32-NEXT: addb %al, %ah
|
||||
; X32-NEXT: movb %ah, %al
|
||||
; X32-NEXT: retl
|
||||
%tmp0 = trunc i16 %a0 to i8
|
||||
%tmp1 = lshr i16 %a0, 8
|
||||
%tmp2 = trunc i16 %tmp1 to i8
|
||||
%tmp3 = add i8 %tmp0, %tmp2
|
||||
ret i8 %tmp3
|
||||
}
|
|
@ -21,7 +21,7 @@ define i32 @foo2(i8 %a) #0 {
|
|||
|
||||
; CHECK-LABEL: bar
|
||||
; CHECK-NOT: cvt
|
||||
; CHECK: movb
|
||||
; CHECK: movl
|
||||
define zeroext i8 @bar(i8 zeroext %a) #0 {
|
||||
%conv = uitofp i8 %a to float
|
||||
%conv1 = fptoui float %conv to i8
|
||||
|
|
|
@ -60,7 +60,7 @@ define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
|
|||
; CHECK-NEXT: subb %sil, %dil
|
||||
; CHECK-NEXT: andb %cl, %dl
|
||||
; CHECK-NEXT: andb %dil, %dl
|
||||
; CHECK-NEXT: movb %dl, %al
|
||||
; CHECK-NEXT: movl %edx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = sub i8 %x0, %x1
|
||||
%t1 = and i8 %x2, %t0
|
||||
|
@ -107,7 +107,7 @@ define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
|
|||
; CHECK-NEXT: subb %sil, %dil
|
||||
; CHECK-NEXT: orb %cl, %dl
|
||||
; CHECK-NEXT: orb %dil, %dl
|
||||
; CHECK-NEXT: movb %dl, %al
|
||||
; CHECK-NEXT: movl %edx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = sub i8 %x0, %x1
|
||||
%t1 = or i8 %x2, %t0
|
||||
|
@ -154,7 +154,7 @@ define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
|
|||
; CHECK-NEXT: subb %sil, %dil
|
||||
; CHECK-NEXT: xorb %cl, %dl
|
||||
; CHECK-NEXT: xorb %dil, %dl
|
||||
; CHECK-NEXT: movb %dl, %al
|
||||
; CHECK-NEXT: movl %edx, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%t0 = sub i8 %x0, %x1
|
||||
%t1 = xor i8 %x2, %t0
|
||||
|
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
; This test should get one and only one register to register mov.
|
||||
; CHECK-LABEL: t:
|
||||
; CHECK: movw
|
||||
; CHECK-NOT: movw
|
||||
; CHECK: movl
|
||||
; CHECK-NOT: mov
|
||||
; CHECK: ret
|
||||
|
||||
define signext i16 @t() {
|
||||
|
|
|
@ -9,6 +9,6 @@ define i2 @f(i32 %arg) {
|
|||
; CHECK-LABEL: f:
|
||||
; CHECK: addb %dil, %dil
|
||||
; CHECK-NEXT: orb $1, %dil
|
||||
; CHECK-NEXT: movb %dil, %al
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
}
|
||||
|
|
|
@ -9,68 +9,68 @@
|
|||
define i8 @test_bitreverse_i8(i8 %a) nounwind {
|
||||
; SSE-LABEL: test_bitreverse_i8:
|
||||
; SSE: # BB#0:
|
||||
; SSE-NEXT: movb %dil, %al
|
||||
; SSE-NEXT: movl %edi, %eax
|
||||
; SSE-NEXT: shlb $7, %al
|
||||
; SSE-NEXT: movb %dil, %cl
|
||||
; SSE-NEXT: movl %edi, %ecx
|
||||
; SSE-NEXT: shlb $5, %cl
|
||||
; SSE-NEXT: andb $64, %cl
|
||||
; SSE-NEXT: movb %dil, %dl
|
||||
; SSE-NEXT: movl %edi, %edx
|
||||
; SSE-NEXT: shlb $3, %dl
|
||||
; SSE-NEXT: andb $32, %dl
|
||||
; SSE-NEXT: orb %cl, %dl
|
||||
; SSE-NEXT: movb %dil, %cl
|
||||
; SSE-NEXT: movl %edi, %ecx
|
||||
; SSE-NEXT: addb %cl, %cl
|
||||
; SSE-NEXT: andb $16, %cl
|
||||
; SSE-NEXT: orb %dl, %cl
|
||||
; SSE-NEXT: movb %dil, %dl
|
||||
; SSE-NEXT: movl %edi, %edx
|
||||
; SSE-NEXT: shrb %dl
|
||||
; SSE-NEXT: andb $8, %dl
|
||||
; SSE-NEXT: orb %cl, %dl
|
||||
; SSE-NEXT: movb %dil, %cl
|
||||
; SSE-NEXT: movl %edi, %ecx
|
||||
; SSE-NEXT: shrb $3, %cl
|
||||
; SSE-NEXT: andb $4, %cl
|
||||
; SSE-NEXT: orb %dl, %cl
|
||||
; SSE-NEXT: movb %dil, %dl
|
||||
; SSE-NEXT: movl %edi, %edx
|
||||
; SSE-NEXT: shrb $5, %dl
|
||||
; SSE-NEXT: andb $2, %dl
|
||||
; SSE-NEXT: orb %cl, %dl
|
||||
; SSE-NEXT: shrb $7, %dil
|
||||
; SSE-NEXT: orb %dl, %dil
|
||||
; SSE-NEXT: orb %al, %dil
|
||||
; SSE-NEXT: movb %dil, %al
|
||||
; SSE-NEXT: movl %edi, %eax
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: test_bitreverse_i8:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: movb %dil, %al
|
||||
; AVX-NEXT: movl %edi, %eax
|
||||
; AVX-NEXT: shlb $7, %al
|
||||
; AVX-NEXT: movb %dil, %cl
|
||||
; AVX-NEXT: movl %edi, %ecx
|
||||
; AVX-NEXT: shlb $5, %cl
|
||||
; AVX-NEXT: andb $64, %cl
|
||||
; AVX-NEXT: movb %dil, %dl
|
||||
; AVX-NEXT: movl %edi, %edx
|
||||
; AVX-NEXT: shlb $3, %dl
|
||||
; AVX-NEXT: andb $32, %dl
|
||||
; AVX-NEXT: orb %cl, %dl
|
||||
; AVX-NEXT: movb %dil, %cl
|
||||
; AVX-NEXT: movl %edi, %ecx
|
||||
; AVX-NEXT: addb %cl, %cl
|
||||
; AVX-NEXT: andb $16, %cl
|
||||
; AVX-NEXT: orb %dl, %cl
|
||||
; AVX-NEXT: movb %dil, %dl
|
||||
; AVX-NEXT: movl %edi, %edx
|
||||
; AVX-NEXT: shrb %dl
|
||||
; AVX-NEXT: andb $8, %dl
|
||||
; AVX-NEXT: orb %cl, %dl
|
||||
; AVX-NEXT: movb %dil, %cl
|
||||
; AVX-NEXT: movl %edi, %ecx
|
||||
; AVX-NEXT: shrb $3, %cl
|
||||
; AVX-NEXT: andb $4, %cl
|
||||
; AVX-NEXT: orb %dl, %cl
|
||||
; AVX-NEXT: movb %dil, %dl
|
||||
; AVX-NEXT: movl %edi, %edx
|
||||
; AVX-NEXT: shrb $5, %dl
|
||||
; AVX-NEXT: andb $2, %dl
|
||||
; AVX-NEXT: orb %cl, %dl
|
||||
; AVX-NEXT: shrb $7, %dil
|
||||
; AVX-NEXT: orb %dl, %dil
|
||||
; AVX-NEXT: orb %al, %dil
|
||||
; AVX-NEXT: movb %dil, %al
|
||||
; AVX-NEXT: movl %edi, %eax
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: test_bitreverse_i8:
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1323,7 +1323,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $1, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %ecx
|
||||
; AVX512BW-NEXT: shrl $8, %ecx
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %cl, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %cl, %al
|
||||
|
@ -1335,7 +1335,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $0, %xmm1, %esi
|
||||
; AVX512BW-NEXT: imull $37, %esi, %edi
|
||||
; AVX512BW-NEXT: shrl $8, %edi
|
||||
; AVX512BW-NEXT: movb %sil, %al
|
||||
; AVX512BW-NEXT: movl %esi, %eax
|
||||
; AVX512BW-NEXT: subb %dil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %dil, %al
|
||||
|
@ -1348,7 +1348,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $2, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1360,7 +1360,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $3, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1372,7 +1372,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $4, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1384,7 +1384,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $5, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1396,7 +1396,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $6, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1408,7 +1408,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $7, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1420,7 +1420,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $8, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1432,7 +1432,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $9, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1444,7 +1444,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $10, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1456,7 +1456,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $11, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1468,7 +1468,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $12, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1480,7 +1480,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $13, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1492,7 +1492,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $14, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1504,7 +1504,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $15, %xmm1, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1517,7 +1517,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1528,7 +1528,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi
|
||||
; AVX512BW-NEXT: imull $37, %esi, %edi
|
||||
; AVX512BW-NEXT: shrl $8, %edi
|
||||
; AVX512BW-NEXT: movb %sil, %al
|
||||
; AVX512BW-NEXT: movl %esi, %eax
|
||||
; AVX512BW-NEXT: subb %dil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %dil, %al
|
||||
|
@ -1541,7 +1541,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1553,7 +1553,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1565,7 +1565,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1577,7 +1577,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1589,7 +1589,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1601,7 +1601,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1613,7 +1613,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1625,7 +1625,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1637,7 +1637,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1649,7 +1649,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1661,7 +1661,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1673,7 +1673,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1685,7 +1685,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1697,7 +1697,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1711,7 +1711,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $1, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1722,7 +1722,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $0, %xmm2, %esi
|
||||
; AVX512BW-NEXT: imull $37, %esi, %edi
|
||||
; AVX512BW-NEXT: shrl $8, %edi
|
||||
; AVX512BW-NEXT: movb %sil, %al
|
||||
; AVX512BW-NEXT: movl %esi, %eax
|
||||
; AVX512BW-NEXT: subb %dil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %dil, %al
|
||||
|
@ -1735,7 +1735,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $2, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1747,7 +1747,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $3, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1759,7 +1759,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $4, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1771,7 +1771,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $5, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1783,7 +1783,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $6, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1795,7 +1795,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $7, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1807,7 +1807,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $8, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1819,7 +1819,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $9, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1831,7 +1831,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $10, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1843,7 +1843,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $11, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1855,7 +1855,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $12, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1867,7 +1867,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $13, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1879,7 +1879,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $14, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1891,7 +1891,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $15, %xmm2, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1903,7 +1903,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $1, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1914,7 +1914,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $0, %xmm0, %esi
|
||||
; AVX512BW-NEXT: imull $37, %esi, %edi
|
||||
; AVX512BW-NEXT: shrl $8, %edi
|
||||
; AVX512BW-NEXT: movb %sil, %al
|
||||
; AVX512BW-NEXT: movl %esi, %eax
|
||||
; AVX512BW-NEXT: subb %dil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %dil, %al
|
||||
|
@ -1927,7 +1927,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $2, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1939,7 +1939,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $3, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1951,7 +1951,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $4, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1963,7 +1963,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $5, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1975,7 +1975,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $6, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1987,7 +1987,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $7, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -1999,7 +1999,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $8, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -2011,7 +2011,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $9, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -2023,7 +2023,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $10, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -2035,7 +2035,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $11, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -2047,7 +2047,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $12, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -2059,7 +2059,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $13, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -2071,7 +2071,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $14, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
@ -2083,7 +2083,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind {
|
|||
; AVX512BW-NEXT: vpextrb $15, %xmm0, %edx
|
||||
; AVX512BW-NEXT: imull $37, %edx, %esi
|
||||
; AVX512BW-NEXT: shrl $8, %esi
|
||||
; AVX512BW-NEXT: movb %dl, %al
|
||||
; AVX512BW-NEXT: movl %edx, %eax
|
||||
; AVX512BW-NEXT: subb %sil, %al
|
||||
; AVX512BW-NEXT: shrb %al
|
||||
; AVX512BW-NEXT: addb %sil, %al
|
||||
|
|
|
@ -295,7 +295,7 @@ entry:
|
|||
define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
|
||||
entry:
|
||||
; CHECK-LABEL: smulo.i8
|
||||
; CHECK: movb %dil, %al
|
||||
; CHECK: movl %edi, %eax
|
||||
; CHECK-NEXT: imulb %sil
|
||||
; CHECK-NEXT: seto %cl
|
||||
%t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
|
||||
|
@ -345,7 +345,7 @@ entry:
|
|||
define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
|
||||
entry:
|
||||
; CHECK-LABEL: umulo.i8
|
||||
; CHECK: movb %dil, %al
|
||||
; CHECK: movl %edi, %eax
|
||||
; CHECK-NEXT: mulb %sil
|
||||
; CHECK-NEXT: seto %cl
|
||||
%t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
|
||||
|
|
Loading…
Reference in New Issue