forked from OSchip/llvm-project
AVX-512: Optimization for patterns with i1 scalar type
The patch removes redundant kmov instructions (not all, we still have a lot of work here) and redundant "and" instructions after "setcc". I use "AssertZero" marker between X86ISD::SETCC node and "truncate" to eliminate extra "and $1" instruction. I also changed zext, aext and trunc patterns in the .td file. It allows to remove extra "kmov" instruictions. This patch fixes https://llvm.org/bugs/show_bug.cgi?id=28173. Fast ISEL mode is not supported correctly for AVX-512. ICMP/FCMP scalar instruction should return result in k-reg. It will be fixed in one of the next patches. I redirected handling of "cmp" to the DAG builder mode. (The code looks worse in one specific test case, but without this fix the new patch fails). Differential revision: http://reviews.llvm.org/D21956 llvm-svn: 274613
This commit is contained in:
parent
e40530ea7b
commit
5a4f2476fd
|
@ -1404,6 +1404,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
|
|||
if (!isTypeLegal(I->getOperand(0)->getType(), VT))
|
||||
return false;
|
||||
|
||||
if (I->getType()->isIntegerTy(1) && Subtarget->hasAVX512())
|
||||
return false;
|
||||
|
||||
// Try to optimize or fold the cmp.
|
||||
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
|
||||
unsigned ResultReg = 0;
|
||||
|
|
|
@ -15551,8 +15551,11 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
|||
isNullConstant(Op1) &&
|
||||
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
||||
if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) {
|
||||
if (VT == MVT::i1)
|
||||
if (VT == MVT::i1) {
|
||||
NewSetCC = DAG.getNode(ISD::AssertZext, dl, MVT::i8, NewSetCC,
|
||||
DAG.getValueType(MVT::i1));
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC);
|
||||
}
|
||||
return NewSetCC;
|
||||
}
|
||||
}
|
||||
|
@ -15574,8 +15577,11 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
|||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
|
||||
DAG.getConstant(CCode, dl, MVT::i8),
|
||||
Op0.getOperand(1));
|
||||
if (VT == MVT::i1)
|
||||
if (VT == MVT::i1) {
|
||||
SetCC = DAG.getNode(ISD::AssertZext, dl, MVT::i8, SetCC,
|
||||
DAG.getValueType(MVT::i1));
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
|
||||
}
|
||||
return SetCC;
|
||||
}
|
||||
}
|
||||
|
@ -15599,8 +15605,11 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
|||
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
|
||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
|
||||
DAG.getConstant(X86CC, dl, MVT::i8), EFLAGS);
|
||||
if (VT == MVT::i1)
|
||||
if (VT == MVT::i1) {
|
||||
SetCC = DAG.getNode(ISD::AssertZext, dl, MVT::i8, SetCC,
|
||||
DAG.getValueType(MVT::i1));
|
||||
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
|
||||
}
|
||||
return SetCC;
|
||||
}
|
||||
|
||||
|
@ -15619,8 +15628,11 @@ SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const {
|
|||
SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry);
|
||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
|
||||
DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1));
|
||||
if (Op.getSimpleValueType() == MVT::i1)
|
||||
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
|
||||
if (Op.getSimpleValueType() == MVT::i1) {
|
||||
SetCC = DAG.getNode(ISD::AssertZext, DL, MVT::i8, SetCC,
|
||||
DAG.getValueType(MVT::i1));
|
||||
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
|
||||
}
|
||||
return SetCC;
|
||||
}
|
||||
|
||||
|
@ -15650,14 +15662,23 @@ static bool isX86LogicalCmp(SDValue Op) {
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
|
||||
/// Returns the "condition" node, that may be wrapped with "truncate".
|
||||
/// Like this: (i1 (trunc (i8 X86ISD::SETCC))).
|
||||
static SDValue getCondAfterTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
|
||||
if (V.getOpcode() != ISD::TRUNCATE)
|
||||
return false;
|
||||
return V;
|
||||
|
||||
SDValue VOp0 = V.getOperand(0);
|
||||
if (VOp0.getOpcode() == ISD::AssertZext &&
|
||||
V.getValueSizeInBits() ==
|
||||
cast<VTSDNode>(VOp0.getOperand(1))->getVT().getSizeInBits())
|
||||
return VOp0.getOperand(0);
|
||||
|
||||
unsigned InBits = VOp0.getValueSizeInBits();
|
||||
unsigned Bits = V.getValueSizeInBits();
|
||||
return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
|
||||
if (DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)))
|
||||
return V.getOperand(0);
|
||||
return V;
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
@ -15880,8 +15901,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
if (addTest) {
|
||||
// Look past the truncate if the high bits are known zero.
|
||||
if (isTruncWithZeroHighBitsInput(Cond, DAG))
|
||||
Cond = Cond.getOperand(0);
|
||||
Cond = getCondAfterTruncWithZeroHighBitsInput(Cond, DAG);
|
||||
|
||||
// We know the result of AND is compared against zero. Try to match
|
||||
// it to BT.
|
||||
|
@ -16719,8 +16739,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
if (addTest) {
|
||||
// Look pass the truncate if the high bits are known zero.
|
||||
if (isTruncWithZeroHighBitsInput(Cond, DAG))
|
||||
Cond = Cond.getOperand(0);
|
||||
Cond = getCondAfterTruncWithZeroHighBitsInput(Cond, DAG);
|
||||
|
||||
// We know the result of AND is compared against zero. Try to match
|
||||
// it to BT.
|
||||
|
@ -17980,7 +17999,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
|
|||
SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
|
||||
SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8);
|
||||
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
|
||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i1, CC, Test);
|
||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
|
||||
|
@ -20494,10 +20513,15 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
|
|||
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
|
||||
|
||||
SDValue SetCC =
|
||||
DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
|
||||
DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
|
||||
DAG.getConstant(X86::COND_O, DL, MVT::i32),
|
||||
SDValue(Sum.getNode(), 2));
|
||||
|
||||
if (N->getValueType(1) == MVT::i1) {
|
||||
SetCC = DAG.getNode(ISD::AssertZext, DL, MVT::i8, SetCC,
|
||||
DAG.getValueType(MVT::i1));
|
||||
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
|
||||
}
|
||||
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
|
||||
}
|
||||
}
|
||||
|
@ -20507,10 +20531,15 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
|
|||
SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
|
||||
|
||||
SDValue SetCC =
|
||||
DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
|
||||
DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
|
||||
DAG.getConstant(Cond, DL, MVT::i32),
|
||||
SDValue(Sum.getNode(), 1));
|
||||
|
||||
|
||||
if (N->getValueType(1) == MVT::i1) {
|
||||
SetCC = DAG.getNode(ISD::AssertZext, DL, MVT::i8, SetCC,
|
||||
DAG.getValueType(MVT::i1));
|
||||
SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
|
||||
}
|
||||
return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
|
||||
}
|
||||
|
||||
|
@ -26870,6 +26899,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
|
|||
// Skip (zext $x), (trunc $x), or (and $x, 1) node.
|
||||
while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
|
||||
SetCC.getOpcode() == ISD::TRUNCATE ||
|
||||
SetCC.getOpcode() == ISD::AssertZext ||
|
||||
SetCC.getOpcode() == ISD::AND) {
|
||||
if (SetCC.getOpcode() == ISD::AND) {
|
||||
int OpIdx = -1;
|
||||
|
|
|
@ -2083,51 +2083,65 @@ let Predicates = [HasBWI] in {
|
|||
(KMOVQkm addr:$src)>;
|
||||
}
|
||||
|
||||
def assertzext_i1 : PatFrag<(ops node:$src), (assertzext node:$src), [{
|
||||
return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i1;
|
||||
}]>;
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(i1 (trunc (i64 GR64:$src))),
|
||||
(COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
|
||||
(i32 1))), VK1)>;
|
||||
(COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG (AND64ri8 $src, (i64 1)),
|
||||
sub_16bit)), VK1)>;
|
||||
|
||||
def : Pat<(i1 (trunc (i64 (assertzext_i1 GR64:$src)))),
|
||||
(COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
|
||||
|
||||
def : Pat<(i1 (trunc (i32 GR32:$src))),
|
||||
(COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>;
|
||||
(COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG (AND32ri8 $src, (i32 1)),
|
||||
sub_16bit)), VK1)>;
|
||||
|
||||
def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
|
||||
(COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
|
||||
|
||||
def : Pat<(i1 (trunc (i8 GR8:$src))),
|
||||
(COPY_TO_REGCLASS
|
||||
(KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))),
|
||||
VK1)>;
|
||||
(COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri8 $src, (i8 1)),
|
||||
sub_8bit)), VK1)>;
|
||||
|
||||
def : Pat<(i1 (trunc (i8 (assertzext_i1 GR8:$src)))),
|
||||
(COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), $src, sub_8bit)), VK1)>;
|
||||
|
||||
def : Pat<(i1 (trunc (i16 GR16:$src))),
|
||||
(COPY_TO_REGCLASS
|
||||
(KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))),
|
||||
VK1)>;
|
||||
(COPY_TO_REGCLASS (i16 (AND16ri8 $src, (i16 1))), VK1)>;
|
||||
|
||||
def : Pat<(i1 (trunc (i16 (assertzext_i1 GR16:$src)))),
|
||||
(COPY_TO_REGCLASS $src, VK1)>;
|
||||
|
||||
def : Pat<(i32 (zext VK1:$src)),
|
||||
(AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
|
||||
(i32 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
|
||||
sub_16bit))>;
|
||||
|
||||
def : Pat<(i32 (anyext VK1:$src)),
|
||||
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
|
||||
(i32 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
|
||||
sub_16bit))>;
|
||||
|
||||
def : Pat<(i8 (zext VK1:$src)),
|
||||
(EXTRACT_SUBREG
|
||||
(AND32ri8 (KMOVWrk
|
||||
(COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
|
||||
(i8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS VK1:$src, GR16)), sub_8bit))>;
|
||||
|
||||
def : Pat<(i8 (anyext VK1:$src)),
|
||||
(EXTRACT_SUBREG
|
||||
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
|
||||
(i8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS $src, GR16)), sub_8bit))>;
|
||||
|
||||
def : Pat<(i64 (zext VK1:$src)),
|
||||
(AND64ri8 (SUBREG_TO_REG (i64 0),
|
||||
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
|
||||
(i64 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
|
||||
sub_16bit))>;
|
||||
|
||||
def : Pat<(i64 (anyext VK1:$src)),
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit)>;
|
||||
(i64 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
|
||||
sub_16bit))>;
|
||||
|
||||
def : Pat<(i16 (zext VK1:$src)),
|
||||
(EXTRACT_SUBREG
|
||||
(AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
|
||||
sub_16bit)>;
|
||||
(COPY_TO_REGCLASS $src, GR16)>;
|
||||
|
||||
def : Pat<(i16 (anyext VK1:$src)),
|
||||
(EXTRACT_SUBREG
|
||||
(KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)),
|
||||
sub_16bit)>;
|
||||
(i16 (COPY_TO_REGCLASS $src, GR16))>;
|
||||
}
|
||||
def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
|
||||
(COPY_TO_REGCLASS VK1:$src, VK16)>;
|
||||
|
|
|
@ -163,12 +163,10 @@ define i32 @test10(i64 %b, i64 %c, i1 %d) {
|
|||
; ALL-NEXT: kmovw %edx, %k0
|
||||
; ALL-NEXT: cmpq %rsi, %rdi
|
||||
; ALL-NEXT: sete %al
|
||||
; ALL-NEXT: andl $1, %eax
|
||||
; ALL-NEXT: kmovw %eax, %k1
|
||||
; ALL-NEXT: korw %k1, %k0, %k1
|
||||
; ALL-NEXT: kxorw %k1, %k0, %k0
|
||||
; ALL-NEXT: kmovw %k0, %eax
|
||||
; ALL-NEXT: andl $1, %eax
|
||||
; ALL-NEXT: testb %al, %al
|
||||
; ALL-NEXT: je LBB8_1
|
||||
; ALL-NEXT: ## BB#2: ## %if.end.i
|
||||
|
|
|
@ -1513,265 +1513,264 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
|||
; KNL-NEXT: vptestmd %zmm4, %zmm4, %k0
|
||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %edx
|
||||
; KNL-NEXT: kmovw %k1, %ecx
|
||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %ecx
|
||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %edi
|
||||
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %esi
|
||||
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r13d
|
||||
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r8d
|
||||
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r10d
|
||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r11d
|
||||
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %ebx
|
||||
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %ebp
|
||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r14d
|
||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r15d
|
||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r9d
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r12d
|
||||
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k1
|
||||
; KNL-NEXT: kshiftlw $0, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vmovd %eax, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: kshiftlw $14, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $1, %edx, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; KNL-NEXT: kshiftlw $15, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $2, %ecx, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: kshiftlw $13, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $3, %edi, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %edi
|
||||
; KNL-NEXT: kshiftlw $12, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $4, %esi, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %esi
|
||||
; KNL-NEXT: kshiftlw $11, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $5, %r13d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r13d
|
||||
; KNL-NEXT: kshiftlw $10, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $6, %r8d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r8d
|
||||
; KNL-NEXT: kshiftlw $9, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $7, %r10d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r10d
|
||||
; KNL-NEXT: kshiftlw $8, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $8, %r11d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r11d
|
||||
; KNL-NEXT: kshiftlw $7, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $9, %ebx, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %ebx
|
||||
; KNL-NEXT: kshiftlw $6, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $10, %ebp, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %ebp
|
||||
; KNL-NEXT: kshiftlw $5, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $11, %r14d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r14d
|
||||
; KNL-NEXT: kshiftlw $4, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $12, %r15d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r15d
|
||||
; KNL-NEXT: kshiftlw $3, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $13, %r9d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; KNL-NEXT: kshiftlw $2, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $14, %r12d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r12d
|
||||
; KNL-NEXT: kshiftlw $1, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r9d
|
||||
; KNL-NEXT: vptestmd %zmm6, %zmm6, %k0
|
||||
; KNL-NEXT: kshiftlw $0, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vmovd %ecx, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %edx
|
||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
|
||||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $2, %edi, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $3, %esi, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %edi
|
||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $4, %r13d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %ecx
|
||||
; KNL-NEXT: kmovw %k1, %edx
|
||||
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $5, %r8d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %r8d
|
||||
; KNL-NEXT: kmovw %k1, %r13d
|
||||
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $6, %r10d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %r13d
|
||||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $7, %r11d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %esi
|
||||
; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $8, %ebx, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %ebx
|
||||
; KNL-NEXT: kmovw %k1, %edi
|
||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $9, %ebp, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %ebp
|
||||
; KNL-NEXT: kmovw %k1, %r8d
|
||||
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $10, %r14d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %r10d
|
||||
; KNL-NEXT: kmovw %k1, %r9d
|
||||
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $11, %r15d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %r11d
|
||||
; KNL-NEXT: kmovw %k1, %r10d
|
||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
|
||||
; KNL-NEXT: kmovw %k1, %esi
|
||||
; KNL-NEXT: kmovw %k1, %r11d
|
||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $13, %r12d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %r14d
|
||||
; KNL-NEXT: kmovw %k1, %ebx
|
||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $14, %r9d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %r9d
|
||||
; KNL-NEXT: kmovw %k1, %ebp
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $15, %edx, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k1, %r15d
|
||||
; KNL-NEXT: vptestmd %zmm7, %zmm7, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r14d
|
||||
; KNL-NEXT: vptestmd %zmm5, %zmm5, %k2
|
||||
; KNL-NEXT: kshiftlw $0, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vmovd %eax, %xmm6
|
||||
; KNL-NEXT: vmovd %r15d, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r15d
|
||||
; KNL-NEXT: kshiftlw $14, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $1, %ecx, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: kshiftlw $15, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $2, %r12d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: kshiftlw $13, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $3, %edx, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r12d
|
||||
; KNL-NEXT: kshiftlw $12, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $4, %r13d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: kshiftlw $11, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload
|
||||
; KNL-NEXT: kmovw %k0, %r13d
|
||||
; KNL-NEXT: kshiftlw $10, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $6, %esi, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %esi
|
||||
; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; KNL-NEXT: kshiftlw $9, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $7, %edi, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %esi
|
||||
; KNL-NEXT: kshiftlw $8, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $8, %r8d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %edi
|
||||
; KNL-NEXT: kshiftlw $7, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $9, %r9d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r8d
|
||||
; KNL-NEXT: kshiftlw $6, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $10, %r10d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r9d
|
||||
; KNL-NEXT: kshiftlw $5, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $11, %r11d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r10d
|
||||
; KNL-NEXT: kshiftlw $4, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $12, %ebx, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %ebx
|
||||
; KNL-NEXT: kshiftlw $3, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $13, %ebp, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %ebp
|
||||
; KNL-NEXT: kshiftlw $2, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $14, %r14d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r11d
|
||||
; KNL-NEXT: kshiftlw $1, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $15, %r15d, %xmm4, %xmm4
|
||||
; KNL-NEXT: kmovw %k0, %r14d
|
||||
; KNL-NEXT: vptestmd %zmm6, %zmm6, %k1
|
||||
; KNL-NEXT: kshiftlw $0, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vmovd %eax, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %r15d
|
||||
; KNL-NEXT: kshiftlw $14, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: vpinsrb $1, %ecx, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: kshiftlw $15, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $2, %edi, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: vpinsrb $2, %r12d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: kshiftlw $13, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $3, %ecx, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: vpinsrb $3, %edx, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %r12d
|
||||
; KNL-NEXT: kshiftlw $12, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $4, %r8d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %r8d
|
||||
; KNL-NEXT: vpinsrb $4, %r13d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: kshiftlw $11, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $5, %r13d, %xmm6, %xmm6
|
||||
; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
|
||||
; KNL-NEXT: kmovw %k0, %r13d
|
||||
; KNL-NEXT: kshiftlw $10, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
|
||||
; KNL-NEXT: kmovw %k0, %edi
|
||||
; KNL-NEXT: vpinsrb $6, %esi, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %esi
|
||||
; KNL-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
|
||||
; KNL-NEXT: kshiftlw $9, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $7, %ebx, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %ebx
|
||||
; KNL-NEXT: vpinsrb $7, %edi, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %esi
|
||||
; KNL-NEXT: kshiftlw $8, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $8, %ebp, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %ebp
|
||||
; KNL-NEXT: vpinsrb $8, %r8d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %edi
|
||||
; KNL-NEXT: kshiftlw $7, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $9, %r10d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %r10d
|
||||
; KNL-NEXT: vpinsrb $9, %r9d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %r8d
|
||||
; KNL-NEXT: kshiftlw $6, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $10, %r11d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %r11d
|
||||
; KNL-NEXT: vpinsrb $10, %r10d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %r9d
|
||||
; KNL-NEXT: kshiftlw $5, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $11, %esi, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %esi
|
||||
; KNL-NEXT: vpinsrb $11, %ebx, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %ebx
|
||||
; KNL-NEXT: kshiftlw $4, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $12, %r14d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %r14d
|
||||
; KNL-NEXT: vpinsrb $12, %ebp, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %ebp
|
||||
; KNL-NEXT: kshiftlw $3, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $13, %r9d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %r9d
|
||||
; KNL-NEXT: vpinsrb $13, %r11d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %r10d
|
||||
; KNL-NEXT: kshiftlw $2, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $14, %r15d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %r15d
|
||||
; KNL-NEXT: vpinsrb $14, %r14d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %r11d
|
||||
; KNL-NEXT: kshiftlw $1, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $15, %r12d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k0, %r12d
|
||||
; KNL-NEXT: kshiftlw $0, %k1, %k0
|
||||
; KNL-NEXT: vpinsrb $15, %r15d, %xmm5, %xmm5
|
||||
; KNL-NEXT: kmovw %k0, %r14d
|
||||
; KNL-NEXT: vptestmd %zmm7, %zmm7, %k0
|
||||
; KNL-NEXT: kshiftlw $0, %k1, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vmovd %eax, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %r15d
|
||||
; KNL-NEXT: kshiftlw $14, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $1, %ecx, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %ecx
|
||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $2, %r12d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %r12d
|
||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $3, %edx, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %edx
|
||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $4, %r13d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %r13d
|
||||
; KNL-NEXT: kshiftlw $11, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
|
||||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: kshiftlw $10, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $6, %esi, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %esi
|
||||
; KNL-NEXT: kshiftlw $9, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $7, %edi, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %edi
|
||||
; KNL-NEXT: kshiftlw $8, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $8, %r8d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %r8d
|
||||
; KNL-NEXT: kshiftlw $7, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $9, %r9d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %r9d
|
||||
; KNL-NEXT: kshiftlw $6, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $10, %ebx, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %ebx
|
||||
; KNL-NEXT: kshiftlw $5, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $11, %ebp, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %ebp
|
||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %r10d
|
||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $13, %r11d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %r11d
|
||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $14, %r14d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %r14d
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vpinsrb $15, %r15d, %xmm6, %xmm6
|
||||
; KNL-NEXT: kmovw %k1, %r15d
|
||||
; KNL-NEXT: kshiftlw $0, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vmovd %edx, %xmm7
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: vpinsrb $1, %eax, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $2, %ecx, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $3, %r8d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $4, %r13d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $5, %edi, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $6, %ebx, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $7, %ebp, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $8, %r10d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $9, %r11d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $10, %esi, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $11, %r14d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $12, %r9d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $13, %r15d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vmovd %r12d, %xmm7
|
||||
; KNL-NEXT: kmovw %k0, %r12d
|
||||
; KNL-NEXT: vpinsrb $1, %ecx, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $2, %edx, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $3, %r13d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $4, %eax, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $5, %esi, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $6, %edi, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $7, %r8d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $8, %r9d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $9, %ebx, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $10, %ebp, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $11, %r10d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $12, %r11d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpinsrb $13, %r14d, %xmm7, %xmm7
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
|
||||
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
|
||||
|
@ -1784,8 +1783,8 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
|
|||
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
|
||||
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
|
||||
; KNL-NEXT: vpand %ymm2, %ymm4, %ymm2
|
||||
; KNL-NEXT: vpinsrb $14, %r12d, %xmm7, %xmm4
|
||||
; KNL-NEXT: vpinsrb $15, %edx, %xmm4, %xmm4
|
||||
; KNL-NEXT: vpinsrb $14, %r15d, %xmm7, %xmm4
|
||||
; KNL-NEXT: vpinsrb $15, %r12d, %xmm4, %xmm4
|
||||
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
|
||||
; KNL-NEXT: vpsllw $15, %ymm4, %ymm4
|
||||
; KNL-NEXT: vpsraw $15, %ymm4, %ymm4
|
||||
|
|
|
@ -159,7 +159,6 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
|
|||
;CHECK-LABEL: test13
|
||||
;CHECK: cmpl %esi, %edi
|
||||
;CHECK: setb %al
|
||||
;CHECK: andl $1, %eax
|
||||
;CHECK: kmovw %eax, %k0
|
||||
;CHECK: movw $-4
|
||||
;CHECK: korw
|
||||
|
|
|
@ -9,9 +9,7 @@ define i32 @test_kortestz(i16 %a0, i16 %a1) {
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: kortestw %k0, %k1
|
||||
; CHECK-NEXT: sete %al
|
||||
; CHECK-NEXT: kmovw %eax, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: movzbl %al, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
|
||||
ret i32 %res
|
||||
|
@ -5091,7 +5089,6 @@ define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
|
||||
|
@ -5112,7 +5109,6 @@ define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1
|
|||
; CHECK-NEXT: kandw %k2, %k1, %k1
|
||||
; CHECK-NEXT: korw %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
|
||||
|
@ -5135,7 +5131,6 @@ define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vcmpunordss %xmm1, %xmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
|
||||
|
@ -5153,9 +5148,8 @@ define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1,
|
|||
; CHECK-NEXT: vcmpneqss %xmm1, %xmm0, %k2 {%k1}
|
||||
; CHECK-NEXT: kmovw %k2, %ecx
|
||||
; CHECK-NEXT: vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: kmovw %k1, %edx
|
||||
; CHECK-NEXT: andl $1, %edx
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: kmovw %k1, %eax
|
||||
; CHECK-NEXT: kmovw %k0, %edx
|
||||
; CHECK-NEXT: andb %cl, %al
|
||||
; CHECK-NEXT: andb %dl, %al
|
||||
; CHECK-NEXT: retq
|
||||
|
|
|
@ -173,18 +173,35 @@ define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
|
|||
; CHECK-NEXT: kshiftlw $10, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrw $15, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%cmp_res = icmp ugt <16 x i32> %a, %b
|
||||
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
|
||||
%res = zext i1 %cmp_res.i1 to i32
|
||||
ret i32 %res
|
||||
}define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
||||
}
|
||||
|
||||
define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: zext_test2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kshiftlw $10, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrw $15, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%cmp_res = icmp ugt <16 x i32> %a, %b
|
||||
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
|
||||
%res = zext i1 %cmp_res.i1 to i16
|
||||
ret i16 %res
|
||||
}define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
||||
}
|
||||
|
||||
define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
|
||||
; CHECK-LABEL: zext_test3:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
|
||||
; CHECK-NEXT: kshiftlw $10, %k0, %k0
|
||||
; CHECK-NEXT: kshiftrw $15, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: retq
|
||||
%cmp_res = icmp ugt <16 x i32> %a, %b
|
||||
%cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
|
||||
%res = zext i1 %cmp_res.i1 to i8
|
||||
|
@ -579,7 +596,6 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
|
|||
; SKX-NEXT: kmovq %rdi, %k0
|
||||
; SKX-NEXT: cmpl %edx, %esi
|
||||
; SKX-NEXT: setg %al
|
||||
; SKX-NEXT: andl $1, %eax
|
||||
; SKX-NEXT: kmovw %eax, %k1
|
||||
; SKX-NEXT: kshiftlq $5, %k1, %k1
|
||||
; SKX-NEXT: korq %k1, %k0, %k0
|
||||
|
@ -1623,10 +1639,10 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|||
; KNL-NEXT: kmovw %k1, %r8d
|
||||
; KNL-NEXT: kshiftlw $15, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r10d
|
||||
; KNL-NEXT: kmovw %k1, %r9d
|
||||
; KNL-NEXT: kshiftlw $13, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r9d
|
||||
; KNL-NEXT: kmovw %k1, %r10d
|
||||
; KNL-NEXT: kshiftlw $12, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %r11d
|
||||
|
@ -1653,22 +1669,22 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: kshiftlw $4, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %edx
|
||||
; KNL-NEXT: kmovw %k1, %ecx
|
||||
; KNL-NEXT: kshiftlw $3, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %ecx
|
||||
; KNL-NEXT: kmovw %k1, %edx
|
||||
; KNL-NEXT: kshiftlw $2, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: kmovw %k1, %esi
|
||||
; KNL-NEXT: kshiftlw $1, %k0, %k1
|
||||
; KNL-NEXT: kshiftrw $15, %k1, %k1
|
||||
; KNL-NEXT: vmovd %r10d, %xmm3
|
||||
; KNL-NEXT: kmovw %k1, %r10d
|
||||
; KNL-NEXT: vmovd %r9d, %xmm3
|
||||
; KNL-NEXT: kmovw %k1, %r9d
|
||||
; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2
|
||||
; KNL-NEXT: kshiftlw $0, %k0, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2
|
||||
; KNL-NEXT: vpinsrb $2, %r9d, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2
|
||||
|
@ -1677,10 +1693,10 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|||
; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $11, %edx, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $12, %ecx, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $14, %r10d, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
|
||||
; KNL-NEXT: vpmovsxbd %xmm2, %zmm2
|
||||
|
@ -1713,7 +1729,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|||
; KNL-NEXT: kmovw %k0, %r13d
|
||||
; KNL-NEXT: kshiftlw $7, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: kshiftlw $6, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %esi
|
||||
|
@ -1728,7 +1744,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: kshiftlw $2, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: kshiftlw $1, %k2, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vmovd %r10d, %xmm2
|
||||
|
@ -1743,12 +1759,12 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|||
; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $8, %edx, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1
|
||||
; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
|
||||
|
@ -1782,7 +1798,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|||
; KNL-NEXT: kmovw %k0, %r13d
|
||||
; KNL-NEXT: kshiftlw $7, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: kshiftlw $6, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %esi
|
||||
|
@ -1797,7 +1813,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: kshiftlw $2, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: kmovw %k0, %edx
|
||||
; KNL-NEXT: kmovw %k0, %ecx
|
||||
; KNL-NEXT: kshiftlw $1, %k1, %k0
|
||||
; KNL-NEXT: kshiftrw $15, %k0, %k0
|
||||
; KNL-NEXT: vmovd %r10d, %xmm1
|
||||
|
@ -1812,12 +1828,12 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
|
|||
; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
|
||||
; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0
|
||||
; KNL-NEXT: kmovw %k1, %eax
|
||||
; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
|
||||
|
|
|
@ -490,7 +490,6 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je LBB28_2
|
||||
; CHECK-NEXT: ## BB#1:
|
||||
|
@ -498,7 +497,6 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
|
|||
; CHECK-NEXT: LBB28_2:
|
||||
; CHECK-NEXT: vfpclasssd $4, %xmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %ecx
|
||||
; CHECK-NEXT: andl $1, %ecx
|
||||
; CHECK-NEXT: testb %cl, %cl
|
||||
; CHECK-NEXT: je LBB28_4
|
||||
; CHECK-NEXT: ## BB#3:
|
||||
|
@ -521,7 +519,6 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
|
|||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK-NEXT: vfpclassss $4, %xmm0, %k0 {%k1}
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
; CHECK-NEXT: andl $1, %eax
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: je LBB29_2
|
||||
; CHECK-NEXT: ## BB#1:
|
||||
|
@ -529,7 +526,6 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
|
|||
; CHECK-NEXT: LBB29_2:
|
||||
; CHECK-NEXT: vfpclassss $4, %xmm0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %ecx
|
||||
; CHECK-NEXT: andl $1, %ecx
|
||||
; CHECK-NEXT: testb %cl, %cl
|
||||
; CHECK-NEXT: je LBB29_4
|
||||
; CHECK-NEXT: ## BB#3:
|
||||
|
|
|
@ -1367,12 +1367,9 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; KNL_64-LABEL: test30:
|
||||
; KNL_64: # BB#0:
|
||||
; KNL_64-NEXT: andl $1, %edx
|
||||
; KNL_64-NEXT: kmovw %edx, %k1
|
||||
; KNL_64-NEXT: andl $1, %esi
|
||||
; KNL_64-NEXT: kmovw %esi, %k2
|
||||
; KNL_64-NEXT: movl %edi, %eax
|
||||
; KNL_64-NEXT: andl $1, %eax
|
||||
; KNL_64-NEXT: kmovw %eax, %k0
|
||||
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
|
||||
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
|
||||
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
|
||||
|
@ -1380,81 +1377,76 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; KNL_64-NEXT: testb $1, %dil
|
||||
; KNL_64-NEXT: je .LBB29_2
|
||||
; KNL_64-NEXT: # BB#1: # %cond.load
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rax
|
||||
; KNL_64-NEXT: vmovd (%rax), %xmm0
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rcx
|
||||
; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL_64-NEXT: .LBB29_2: # %else
|
||||
; KNL_64-NEXT: kmovw %k2, %eax
|
||||
; KNL_64-NEXT: movl %eax, %ecx
|
||||
; KNL_64-NEXT: andl $1, %ecx
|
||||
; KNL_64-NEXT: testb %cl, %cl
|
||||
; KNL_64-NEXT: testb %sil, %sil
|
||||
; KNL_64-NEXT: je .LBB29_4
|
||||
; KNL_64-NEXT: # BB#3: # %cond.load1
|
||||
; KNL_64-NEXT: vpextrq $1, %xmm1, %rcx
|
||||
; KNL_64-NEXT: vpinsrd $1, (%rcx), %xmm0, %xmm0
|
||||
; KNL_64-NEXT: .LBB29_4: # %else2
|
||||
; KNL_64-NEXT: kmovw %k1, %ecx
|
||||
; KNL_64-NEXT: movl %ecx, %edx
|
||||
; KNL_64-NEXT: andl $1, %edx
|
||||
; KNL_64-NEXT: testb %dl, %dl
|
||||
; KNL_64-NEXT: je .LBB29_6
|
||||
; KNL_64-NEXT: # BB#5: # %cond.load4
|
||||
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rdx
|
||||
; KNL_64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0
|
||||
; KNL_64-NEXT: vmovq %xmm1, %rcx
|
||||
; KNL_64-NEXT: vpinsrd $2, (%rcx), %xmm0, %xmm0
|
||||
; KNL_64-NEXT: .LBB29_6: # %else5
|
||||
; KNL_64-NEXT: kmovw %k0, %edx
|
||||
; KNL_64-NEXT: vmovd %edx, %xmm1
|
||||
; KNL_64-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vmovd %eax, %xmm1
|
||||
; KNL_64-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
|
||||
; KNL_64-NEXT: retq
|
||||
;
|
||||
; KNL_32-LABEL: test30:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: pushl %ebx
|
||||
; KNL_32-NEXT: .Ltmp0:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
||||
; KNL_32-NEXT: pushl %esi
|
||||
; KNL_32-NEXT: .Ltmp1:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 12
|
||||
; KNL_32-NEXT: .Ltmp2:
|
||||
; KNL_32-NEXT: .cfi_offset %esi, -12
|
||||
; KNL_32-NEXT: .Ltmp3:
|
||||
; KNL_32-NEXT: .cfi_offset %ebx, -8
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: andl $1, %eax
|
||||
; KNL_32-NEXT: kmovw %eax, %k1
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: andl $1, %eax
|
||||
; KNL_32-NEXT: kmovw %eax, %k2
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; KNL_32-NEXT: movl %eax, %ecx
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||
; KNL_32-NEXT: andl $1, %ecx
|
||||
; KNL_32-NEXT: kmovw %ecx, %k0
|
||||
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
||||
; KNL_32-NEXT: movl %ebx, %edx
|
||||
; KNL_32-NEXT: andl $1, %edx
|
||||
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
|
||||
; KNL_32-NEXT: # implicit-def: %XMM0
|
||||
; KNL_32-NEXT: testb $1, %al
|
||||
; KNL_32-NEXT: testb $1, %bl
|
||||
; KNL_32-NEXT: je .LBB29_2
|
||||
; KNL_32-NEXT: # BB#1: # %cond.load
|
||||
; KNL_32-NEXT: vmovd %xmm1, %eax
|
||||
; KNL_32-NEXT: vmovd (%eax), %xmm0
|
||||
; KNL_32-NEXT: vmovd %xmm1, %esi
|
||||
; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; KNL_32-NEXT: .LBB29_2: # %else
|
||||
; KNL_32-NEXT: kmovw %k2, %eax
|
||||
; KNL_32-NEXT: movl %eax, %ecx
|
||||
; KNL_32-NEXT: andl $1, %ecx
|
||||
; KNL_32-NEXT: testb %cl, %cl
|
||||
; KNL_32-NEXT: je .LBB29_4
|
||||
; KNL_32-NEXT: # BB#3: # %cond.load1
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm1, %ecx
|
||||
; KNL_32-NEXT: vpinsrd $1, (%ecx), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: vpextrd $1, %xmm1, %esi
|
||||
; KNL_32-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: .LBB29_4: # %else2
|
||||
; KNL_32-NEXT: kmovw %k1, %ecx
|
||||
; KNL_32-NEXT: movl %ecx, %edx
|
||||
; KNL_32-NEXT: andl $1, %edx
|
||||
; KNL_32-NEXT: testb %dl, %dl
|
||||
; KNL_32-NEXT: testb %al, %al
|
||||
; KNL_32-NEXT: je .LBB29_6
|
||||
; KNL_32-NEXT: # BB#5: # %cond.load4
|
||||
; KNL_32-NEXT: vpextrd $2, %xmm1, %edx
|
||||
; KNL_32-NEXT: vpinsrd $2, (%edx), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: vpextrd $2, %xmm1, %esi
|
||||
; KNL_32-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0
|
||||
; KNL_32-NEXT: .LBB29_6: # %else5
|
||||
; KNL_32-NEXT: kmovw %k0, %edx
|
||||
; KNL_32-NEXT: vmovd %edx, %xmm1
|
||||
; KNL_32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
|
||||
; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
|
||||
; KNL_32-NEXT: popl %esi
|
||||
; KNL_32-NEXT: popl %ebx
|
||||
; KNL_32-NEXT: retl
|
||||
;
|
||||
; SKX-LABEL: test30:
|
||||
|
@ -1471,7 +1463,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
|
|||
; SKX-NEXT: je .LBB29_2
|
||||
; SKX-NEXT: # BB#1: # %cond.load
|
||||
; SKX-NEXT: vmovq %xmm1, %rax
|
||||
; SKX-NEXT: vmovd (%rax), %xmm0
|
||||
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; SKX-NEXT: .LBB29_2: # %else
|
||||
; SKX-NEXT: kmovb %k1, -{{[0-9]+}}(%rsp)
|
||||
; SKX-NEXT: movb -{{[0-9]+}}(%rsp), %al
|
||||
|
@ -1645,12 +1637,12 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i
|
|||
; KNL_32-LABEL: test_gather_16i64:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: pushl %ebp
|
||||
; KNL_32-NEXT: .Ltmp0:
|
||||
; KNL_32-NEXT: .Ltmp4:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
||||
; KNL_32-NEXT: .Ltmp1:
|
||||
; KNL_32-NEXT: .Ltmp5:
|
||||
; KNL_32-NEXT: .cfi_offset %ebp, -8
|
||||
; KNL_32-NEXT: movl %esp, %ebp
|
||||
; KNL_32-NEXT: .Ltmp2:
|
||||
; KNL_32-NEXT: .Ltmp6:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; KNL_32-NEXT: andl $-64, %esp
|
||||
; KNL_32-NEXT: subl $64, %esp
|
||||
|
@ -1768,12 +1760,12 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
|
|||
; KNL_32-LABEL: test_gather_16f64:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: pushl %ebp
|
||||
; KNL_32-NEXT: .Ltmp3:
|
||||
; KNL_32-NEXT: .Ltmp7:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
||||
; KNL_32-NEXT: .Ltmp4:
|
||||
; KNL_32-NEXT: .Ltmp8:
|
||||
; KNL_32-NEXT: .cfi_offset %ebp, -8
|
||||
; KNL_32-NEXT: movl %esp, %ebp
|
||||
; KNL_32-NEXT: .Ltmp5:
|
||||
; KNL_32-NEXT: .Ltmp9:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; KNL_32-NEXT: andl $-64, %esp
|
||||
; KNL_32-NEXT: subl $64, %esp
|
||||
|
@ -1885,12 +1877,12 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %
|
|||
; KNL_32-LABEL: test_scatter_16i64:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: pushl %ebp
|
||||
; KNL_32-NEXT: .Ltmp6:
|
||||
; KNL_32-NEXT: .Ltmp10:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
||||
; KNL_32-NEXT: .Ltmp7:
|
||||
; KNL_32-NEXT: .Ltmp11:
|
||||
; KNL_32-NEXT: .cfi_offset %ebp, -8
|
||||
; KNL_32-NEXT: movl %esp, %ebp
|
||||
; KNL_32-NEXT: .Ltmp8:
|
||||
; KNL_32-NEXT: .Ltmp12:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; KNL_32-NEXT: andl $-64, %esp
|
||||
; KNL_32-NEXT: subl $64, %esp
|
||||
|
@ -1999,12 +1991,12 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
|
|||
; KNL_32-LABEL: test_scatter_16f64:
|
||||
; KNL_32: # BB#0:
|
||||
; KNL_32-NEXT: pushl %ebp
|
||||
; KNL_32-NEXT: .Ltmp9:
|
||||
; KNL_32-NEXT: .Ltmp13:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_offset 8
|
||||
; KNL_32-NEXT: .Ltmp10:
|
||||
; KNL_32-NEXT: .Ltmp14:
|
||||
; KNL_32-NEXT: .cfi_offset %ebp, -8
|
||||
; KNL_32-NEXT: movl %esp, %ebp
|
||||
; KNL_32-NEXT: .Ltmp11:
|
||||
; KNL_32-NEXT: .Ltmp15:
|
||||
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
|
||||
; KNL_32-NEXT: andl $-64, %esp
|
||||
; KNL_32-NEXT: subl $64, %esp
|
||||
|
|
|
@ -3,39 +3,48 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @test1(i32 %x) #0 {
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: movb %al, %cl
|
||||
; CHECK-NEXT: kmovw %ecx, %k0
|
||||
; CHECK-NEXT: kmovb %k0, %eax
|
||||
; CHECK-NEXT: andb $1, %al
|
||||
; CHECK-NEXT: movzbl %al, %edi
|
||||
; CHECK-NEXT: callq callee1
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%tobool = icmp ne i32 %x, 0
|
||||
call void @callee1(i1 zeroext %tobool)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: cmpl $0, %edi
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: andb $1, %al
|
||||
; CHECK-NEXT: movzbl %al, %edi
|
||||
; CHECK-NEXT: callq callee1
|
||||
|
||||
define void @test2(i32 %x) #0 {
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: # BB#0: # %entry
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: testl %edi, %edi
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: movb %al, %cl
|
||||
; CHECK-NEXT: kmovw %ecx, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %ecx
|
||||
; CHECK-NEXT: movb %cl, %al
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: movl $-1, %edx
|
||||
; CHECK-NEXT: cmovnel %edx, %edi
|
||||
; CHECK-NEXT: callq callee2
|
||||
; CHECK-NEXT: popq %rax
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%tobool = icmp ne i32 %x, 0
|
||||
call void @callee2(i1 signext %tobool)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: cmpl $0, %edi
|
||||
; CHECK-NEXT: setne %al
|
||||
; CHECK-NEXT: kmovb %eax, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %edi
|
||||
; CHECK-NEXT: andl $1, %edi
|
||||
; CHECK-NEXT: movb %dil, %al
|
||||
; CHECK-NEXT: xorl %edi, %edi
|
||||
; CHECK-NEXT: testb %al, %al
|
||||
; CHECK-NEXT: movl $-1, %ecx
|
||||
; CHECK-NEXT: cmovnel %ecx, %edi
|
||||
; CHECK-NEXT: callq callee2
|
||||
|
||||
declare void @callee1(i1 zeroext)
|
||||
declare void @callee2(i1 signext)
|
||||
|
||||
|
|
|
@ -5,12 +5,12 @@ target triple = "x86_64-unknown-linux-gnu"
|
|||
; Note that the kmovs should really *not* appear in the output, this is an
|
||||
; artifact of the current poor lowering. This is tracked by PR28175.
|
||||
|
||||
; CHECK-LABEL: @foo64
|
||||
; CHECK: kmov
|
||||
; CHECK: kmov
|
||||
; CHECK: orq $-2, %rax
|
||||
; CHECK: ret
|
||||
define i64 @foo64(i1 zeroext %i, i32 %j) #0 {
|
||||
; CHECK-LABEL: foo64:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: orq $-2, %rdi
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: retq
|
||||
br label %bb
|
||||
|
||||
bb:
|
||||
|
@ -22,12 +22,12 @@ end:
|
|||
ret i64 %v
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @foo16
|
||||
; CHECK: kmov
|
||||
; CHECK: kmov
|
||||
; CHECK: orl $65534, %eax
|
||||
; CHECK: retq
|
||||
define i16 @foo16(i1 zeroext %i, i32 %j) #0 {
|
||||
; CHECK-LABEL: foo16:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: orl $65534, %edi # imm = 0xFFFE
|
||||
; CHECK-NEXT: movl %edi, %eax
|
||||
; CHECK-NEXT: retq
|
||||
br label %bb
|
||||
|
||||
bb:
|
||||
|
|
|
@ -738,10 +738,10 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
|
|||
; KNL-LABEL: bug27873:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: andl $1, %esi
|
||||
; KNL-NEXT: kmovw %esi, %k0
|
||||
; KNL-NEXT: movl $160, %ecx
|
||||
; KNL-NEXT: movq %rdi, %rax
|
||||
; KNL-NEXT: mulq %rcx
|
||||
; KNL-NEXT: kmovw %esi, %k0
|
||||
; KNL-NEXT: seto %al
|
||||
; KNL-NEXT: kmovw %eax, %k1
|
||||
; KNL-NEXT: korw %k1, %k0, %k0
|
||||
|
|
Loading…
Reference in New Issue