AVX-512: fixed algorithm of building vectors of i1 elements

fixed extract-insert i1 element,
load i1, zextload i1 should be with "and $1, %reg" to prevent loading garbage.
added a bunch of new tests.

llvm-svn: 237793
This commit is contained in:
Elena Demikhovsky 2015-05-20 14:32:03 +00:00
parent 69c6008e49
commit f61727d880
7 changed files with 281 additions and 90 deletions

View File

@ -1471,6 +1471,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
@ -1500,6 +1504,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::SELECT, MVT::v4i1, Custom);
setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
setOperationAction(ISD::AND, MVT::v8i32, Legal);
setOperationAction(ISD::OR, MVT::v8i32, Legal);
@ -5188,12 +5194,27 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
return NV;
}
static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector");
uint64_t Immediate = 0;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() != ISD::UNDEF)
Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
}
SDLoc dl(Op);
MVT VT =
MVT::getIntegerVT(std::max((int)Op.getValueType().getSizeInBits(), 8));
return DAG.getConstant(Immediate, dl, VT);
}
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
SDValue
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
assert((VT.getVectorElementType() == MVT::i1) &&
"Unexpected type in LowerBUILD_VECTORvXi1!");
SDLoc dl(Op);
@ -5209,62 +5230,69 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
bool AllContants = true;
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, dl, VT, Imm);
SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
}
// Vector has one or more non-const elements
uint64_t Immediate = 0;
int NonConstIdx = -1;
SmallVector<unsigned, 16> NonConstIdx;
bool IsSplat = true;
unsigned NumNonConsts = 0;
unsigned NumConsts = 0;
bool HasConstElts = false;
int SplatIdx = -1;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
continue;
if (!isa<ConstantSDNode>(In)) {
AllContants = false;
NonConstIdx = idx;
NumNonConsts++;
} else {
NumConsts++;
if (cast<ConstantSDNode>(In)->getZExtValue())
Immediate |= (1ULL << idx);
if (!isa<ConstantSDNode>(In))
NonConstIdx.push_back(idx);
else {
Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
HasConstElts = true;
}
if (In != Op.getOperand(0))
if (SplatIdx == -1)
SplatIdx = idx;
else if (In != Op.getOperand(SplatIdx))
IsSplat = false;
}
if (AllContants) {
SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
DAG.getConstant(Immediate, dl, MVT::i16));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
DAG.getIntPtrConstant(0, dl));
// for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
if (IsSplat)
return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx),
DAG.getConstant(1, dl, VT),
DAG.getConstant(0, dl, VT));
// insert elements one by one
SDValue DstVec;
SDValue Imm;
if (Immediate) {
MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
Imm = DAG.getConstant(Immediate, dl, ImmVT);
}
else if (HasConstElts)
Imm = DAG.getConstant(0, dl, VT);
else
Imm = DAG.getUNDEF(VT);
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
DstVec = DAG.getNode(ISD::BITCAST, dl, VT, Imm);
else {
SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
DAG.getIntPtrConstant(0, dl));
}
if (NumNonConsts == 1 && NonConstIdx != 0) {
SDValue DstVec;
if (NumConsts) {
SDValue VecAsImm = DAG.getConstant(Immediate, dl,
MVT::getIntegerVT(VT.getSizeInBits()));
DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
}
else
DstVec = DAG.getUNDEF(VT);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
Op.getOperand(NonConstIdx),
DAG.getIntPtrConstant(NonConstIdx, dl));
for (unsigned i = 0; i < NonConstIdx.size(); ++i) {
unsigned InsertIdx = NonConstIdx[i];
DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
Op.getOperand(InsertIdx),
DAG.getIntPtrConstant(InsertIdx, dl));
}
if (!IsSplat && (NonConstIdx != 0))
llvm_unreachable("Unsupported BUILD_VECTOR operation");
MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
SDValue Select;
if (IsSplat)
Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
DAG.getConstant(-1, dl, SelectVT),
DAG.getConstant(0, dl, SelectVT));
else
Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
DAG.getConstant((Immediate | 1), dl, SelectVT),
DAG.getConstant(Immediate, dl, SelectVT));
return DAG.getNode(ISD::BITCAST, dl, VT, Select);
return DstVec;
}
/// \brief Return true if \p N implements a horizontal binop and return the
@ -10670,15 +10698,11 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
if (IdxVal)
EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
if (Vec.getOpcode() == ISD::UNDEF)
return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
const TargetRegisterClass* rc = getRegClassFor(VecVT);
unsigned MaxSift = rc->getSize()*8 - 1;
EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
DAG.getConstant(MaxSift, dl, MVT::i8));
EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec,
DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
return EltInVec;
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
}
@ -13623,6 +13647,29 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
if (VT.isVector() && VT.getScalarType() == MVT::i1) {
SDValue Op1Scalar;
if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
Op1Scalar = ConvertI1VectorToInterger(Op1, DAG);
else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
Op1Scalar = Op1.getOperand(0);
SDValue Op2Scalar;
if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
Op2Scalar = ConvertI1VectorToInterger(Op2, DAG);
else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
Op2Scalar = Op2.getOperand(0);
if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
Op1Scalar.getValueType(),
Cond, Op1Scalar, Op2Scalar);
if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
return DAG.getNode(ISD::BITCAST, DL, VT, newSelect);
SDValue ExtVec = DAG.getNode(ISD::BITCAST, DL, MVT::v8i1, newSelect);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
DAG.getIntPtrConstant(0, DL));
}
}
if (VT == MVT::v4i1 || VT == MVT::v2i1) {
SDValue zeroConst = DAG.getIntPtrConstant(0, DL);
Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
@ -20728,7 +20775,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (!InVec.hasOneUse())
return SDValue();
EVT BCVT = InVec.getOperand(0).getValueType();
if (BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
if (!BCVT.isVector() ||
BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
return SDValue();
InVec = InVec.getOperand(0);
}
@ -20833,7 +20881,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
return NewOp;
SDValue InputVector = N->getOperand(0);
SDLoc dl(InputVector);
// Detect mmx to i32 conversion through a v2i32 elt extract.
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
N->getValueType(0) == MVT::i32 &&
@ -20858,6 +20906,18 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
MMXSrcOp.getOperand(0));
}
EVT VT = N->getValueType(0);
if (VT == MVT::i1 && dyn_cast<ConstantSDNode>(N->getOperand(1)) &&
InputVector.getOpcode() == ISD::BITCAST &&
dyn_cast<ConstantSDNode>(InputVector.getOperand(0))) {
uint64_t ExtractedElt =
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
uint64_t InputValue =
cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
uint64_t Res = (InputValue >> ExtractedElt) & 1;
return DAG.getConstant(Res, dl, MVT::i1);
}
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
if (InputVector.getValueType() != MVT::v4i32)
@ -20903,7 +20963,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
// otherwise bounce the vector off the cache.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Vals[4];
SDLoc dl(InputVector);
if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector);
@ -23606,6 +23665,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
SDLoc dl(N);
// (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
// (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
@ -23613,7 +23673,6 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
// from AH (which we otherwise need to do contortions to access).
if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
N0.getValueType() == MVT::i8 && VT == MVT::i32) {
SDLoc dl(N);
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
N0.getOperand(0), N0.getOperand(1));
@ -23621,8 +23680,15 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
return R.getValue(1);
}
if (!DCI.isBeforeLegalizeOps())
if (!DCI.isBeforeLegalizeOps()) {
if (N0.getValueType() == MVT::i1) {
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue AllOnes =
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT);
return DAG.getNode(ISD::SELECT, dl, VT, N0, AllOnes, Zero);
}
return SDValue();
}
if (!Subtarget->hasFp256())
return SDValue();

View File

@ -1855,7 +1855,9 @@ let Predicates = [HasAVX512] in {
def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
(KMOVWmk addr:$dst, VK16:$src)>;
def : Pat<(i1 (load addr:$src)),
(COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
(COPY_TO_REGCLASS (AND16ri (i16 (SUBREG_TO_REG (i32 0),
(MOV8rm addr:$src), sub_8bit)),
(i16 1)), VK1)>;
def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
(KMOVWkm addr:$src)>;
}
@ -1920,13 +1922,13 @@ let Predicates = [HasAVX512, NoDQI] in {
// GR from/to 8-bit mask without native support
def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
(COPY_TO_REGCLASS
(KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
VK8)>;
(KMOVWkr (MOVZX32rr8 GR8 :$src)), VK8)>;
def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
(EXTRACT_SUBREG
(KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
sub_8bit)>;
}
let Predicates = [HasAVX512] in {
def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
(COPY_TO_REGCLASS VK16:$src, VK1)>;

View File

@ -1064,11 +1064,12 @@ defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
// zextload bool -> zextload byte
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>;
def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>;
def : Pat<(zextloadi64i1 addr:$src),
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
(SUBREG_TO_REG (i64 0),
(AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
// extload bool -> extload byte
// When extloading from 16-bit and smaller memory locations into 64-bit

View File

@ -137,10 +137,12 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
}
;CHECK-LABEL: test13
;CHECK: cmpl
;CHECK: sbbl
;CHECK: orl $65532
;CHECK: ret
;CHECK: cmpl %esi, %edi
;CHECK: setb %al
;CHECK: andl $1, %eax
;CHECK: kmovw %eax, %k0
;CHECK: movw $-4
;CHECK: korw
define i16 @test13(i32 %a, i32 %b) {
%cmp_res = icmp ult i32 %a, %b
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
@ -167,19 +169,22 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
}
;CHECK-LABEL: test15
;CHECK: kshiftlw
;CHECK: kmovw
;CHECK: ret
;CHECK: movb (%rdi), %al
;CHECK: andb $1, %al
;CHECK: movw $-1, %ax
;CHECK: cmovew
define i16 @test15(i1 *%addr) {
%x = load i1 , i1 * %addr, align 128
%x = load i1 , i1 * %addr, align 1
%x1 = insertelement <16 x i1> undef, i1 %x, i32 10
%x2 = bitcast <16 x i1>%x1 to i16
ret i16 %x2
}
;CHECK-LABEL: test16
;CHECK: kshiftlw
;CHECK: kshiftrw
;CHECK: movb (%rdi), %al
;CHECK: andw $1, %ax
;CHECK: kmovw
;CHECK: kshiftlw $10
;CHECK: korw
;CHECK: ret
define i16 @test16(i1 *%addr, i16 %a) {
@ -191,11 +196,11 @@ define i16 @test16(i1 *%addr, i16 %a) {
}
;CHECK-LABEL: test17
;KNL: kshiftlw
;KNL: kshiftrw
;KNL: movb (%rdi), %al
;KNL: andw $1, %ax
;KNL: kshiftlw $4
;KNL: korw
;SKX: kshiftlb
;SKX: kshiftrb
;SKX: kshiftlb $4
;SKX: korb
;CHECK: ret
define i8 @test17(i1 *%addr, i8 %a) {

View File

@ -191,7 +191,7 @@ false:
; SKX-LABEL: test7
; SKX: vpmovw2m
; SKX: kmovw %eax, %k1
; SKX: kmovb %eax, %k1
; SKX: korb
define void @test7(<8 x i1> %mask) {
@ -282,3 +282,114 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
ret <4 x i1>%c
}
; KNL-LABEL: test12
; KNL: movl %edi, %eax
define i32 @test12(i32 %x, i32 %y) {
%a = bitcast i16 21845 to <16 x i1>
%b = extractelement <16 x i1> %a, i32 0
%c = select i1 %b, i32 %x, i32 %y
ret i32 %c
}
; KNL-LABEL: test13
; KNL: movl %esi, %eax
define i32 @test13(i32 %x, i32 %y) {
%a = bitcast i16 21845 to <16 x i1>
%b = extractelement <16 x i1> %a, i32 3
%c = select i1 %b, i32 %x, i32 %y
ret i32 %c
}
; SKX-LABEL: test14
; SKX: movb $11, %al
; SKX: kmovb %eax, %k0
; SKX: vpmovm2d %k0, %xmm0
define <4 x i1> @test14() {
%a = bitcast i16 21845 to <16 x i1>
%b = extractelement <16 x i1> %a, i32 2
%c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
ret <4 x i1> %c
}
; KNL-LABEL: test15
; KNL: cmovgw
define <16 x i1> @test15(i32 %x, i32 %y) {
%a = bitcast i16 21845 to <16 x i1>
%b = bitcast i16 1 to <16 x i1>
%mask = icmp sgt i32 %x, %y
%c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
ret <16 x i1> %c
}
; SKX-LABEL: test16
; SKX: kxnorw %k1, %k1, %k1
; SKX: kshiftrw $15, %k1, %k1
; SKX: kshiftlq $5, %k1, %k1
; SKX: korq %k1, %k0, %k0
; SKX: vpmovm2b %k0, %zmm0
define <64 x i8> @test16(i64 %x) {
%a = bitcast i64 %x to <64 x i1>
%b = insertelement <64 x i1>%a, i1 true, i32 5
%c = sext <64 x i1>%b to <64 x i8>
ret <64 x i8>%c
}
; SKX-LABEL: test17
; SKX: setg %al
; SKX: andl $1, %eax
; SKX: kmovw %eax, %k1
; SKX: kshiftlq $5, %k1, %k1
; SKX: korq %k1, %k0, %k0
; SKX: vpmovm2b %k0, %zmm0
define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
%a = bitcast i64 %x to <64 x i1>
%b = icmp sgt i32 %y, %z
%c = insertelement <64 x i1>%a, i1 %b, i32 5
%d = sext <64 x i1>%c to <64 x i8>
ret <64 x i8>%d
}
; KNL-LABEL: test18
define <8 x i1> @test18(i8 %a, i16 %y) {
%b = bitcast i8 %a to <8 x i1>
%b1 = bitcast i16 %y to <16 x i1>
%el1 = extractelement <16 x i1>%b1, i32 8
%el2 = extractelement <16 x i1>%b1, i32 9
%c = insertelement <8 x i1>%b, i1 %el1, i32 7
%d = insertelement <8 x i1>%c, i1 %el2, i32 6
ret <8 x i1>%d
}
; KNL-LABEL: test19
; KNL: movzbl %dil, %eax
; KNL: kmovw %eax, %k0
; KNL: kshiftlw $13, %k0, %k0
; KNL: kshiftrw $15, %k0, %k0
; KNL: kmovw %k0, %eax
; KNL: andl $1, %eax
; KNL: testb %al, %al
define <8 x i1> @test19(i8 %a) {
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
ret <8 x i1> %c
}
; KNL-LABEL: test20
; KNL: movzbl %dil, %eax
; KNL: kmovw %eax, %k0
; KNL: kshiftlw $13, %k0, %k1
; KNL: kshiftrw $15, %k1, %k1
; KNL: kshiftlw $12, %k0, %k0
; KNL: kshiftrw $15, %k0, %k0
; KNL: kshiftlw $4, %k0, %k0
; KNL: kshiftlw $1, %k1, %k2
; KNL: korw %k0, %k2, %k0
; KNL: kshiftlw $6, %k1, %k1
; KNL: korw %k1, %k0, %k1
define <8 x i1> @test20(i8 %a, i16 %y) {
%b = bitcast i8 %a to <8 x i1>
%c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
ret <8 x i1> %c
}

View File

@ -50,8 +50,10 @@ define <16 x double> @select04(<16 x double> %a, <16 x double> %b) {
}
; CHECK-LABEL: select05
; CHECK: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK: movzbl %sil, %eax
; CHECK: kmovw %eax, %k0
; CHECK: movzbl %dil, %eax
; CHECK: kmovw %eax, %k1
; CHECK-NEXT: korw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
define i8 @select05(i8 %a.0, i8 %m) {
@ -63,8 +65,10 @@ define i8 @select05(i8 %a.0, i8 %m) {
}
; CHECK-LABEL: select06
; CHECK: kmovw %esi, %k0
; CHECK-NEXT: kmovw %edi, %k1
; CHECK: movzbl %sil, %eax
; CHECK: kmovw %eax, %k0
; CHECK: movzbl %dil, %eax
; CHECK: kmovw %eax, %k1
; CHECK-NEXT: kandw %k1, %k0, %k0
; CHECK-NEXT: kmovw %k0, %eax
define i8 @select06(i8 %a.0, i8 %m) {
@ -76,9 +80,12 @@ define i8 @select06(i8 %a.0, i8 %m) {
}
; CHECK-LABEL: select07
; CHECK-DAG: kmovw %edx, %k0
; CHECK-DAG: kmovw %edi, %k1
; CHECK-DAG: kmovw %esi, %k2
; CHECK-DAG: movzbl %dl, %eax
; CHECK-DAG: kmovw %eax, %k0
; CHECK-DAG: movzbl %dil, %eax
; CHECK-DAG: kmovw %eax, %k1
; CHECK-DAG: movzbl %sil, %eax
; CHECK-DAG: kmovw %eax, %k2
; CHECK: kandw %k0, %k1, %k1
; CHECK-NEXT: knotw %k0, %k0
; CHECK-NEXT: kandw %k0, %k2, %k0

View File

@ -156,10 +156,9 @@ define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
}
; CHECK-LABEL: trunc_i32_to_i1
; CHECK: testb
; CHECK: setne
; CKECK: orl
; CHECK: ret
; CHECK: movw $-4, %ax
; CHECK: kmovw %eax, %k1
; CKECK: korw
define i16 @trunc_i32_to_i1(i32 %a) {
%a_i = trunc i32 %a to i1
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0