forked from OSchip/llvm-project
AVX-512: fixed algorithm of building vectors of i1 elements
fixed extract-insert i1 element, load i1, zextload i1 should be with "and $1, %reg" to prevent loading garbage. added a bunch of new tests. llvm-svn: 237793
This commit is contained in:
parent
69c6008e49
commit
f61727d880
|
@ -1471,6 +1471,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
|
||||
|
||||
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
|
||||
const MVT VT = (MVT::SimpleValueType)i;
|
||||
|
@ -1500,6 +1504,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
|||
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v2i1, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i1, Custom);
|
||||
|
||||
setOperationAction(ISD::AND, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::OR, MVT::v8i32, Legal);
|
||||
|
@ -5188,12 +5194,27 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
|
|||
return NV;
|
||||
}
|
||||
|
||||
static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
|
||||
assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
|
||||
Op.getScalarValueSizeInBits() == 1 &&
|
||||
"Can not convert non-constant vector");
|
||||
uint64_t Immediate = 0;
|
||||
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
|
||||
SDValue In = Op.getOperand(idx);
|
||||
if (In.getOpcode() != ISD::UNDEF)
|
||||
Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
|
||||
}
|
||||
SDLoc dl(Op);
|
||||
MVT VT =
|
||||
MVT::getIntegerVT(std::max((int)Op.getValueType().getSizeInBits(), 8));
|
||||
return DAG.getConstant(Immediate, dl, VT);
|
||||
}
|
||||
// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
|
||||
SDValue
|
||||
X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
MVT VT = Op.getSimpleValueType();
|
||||
assert((VT.getVectorElementType() == MVT::i1) && (VT.getSizeInBits() <= 16) &&
|
||||
assert((VT.getVectorElementType() == MVT::i1) &&
|
||||
"Unexpected type in LowerBUILD_VECTORvXi1!");
|
||||
|
||||
SDLoc dl(Op);
|
||||
|
@ -5209,62 +5230,69 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
|
|||
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
|
||||
}
|
||||
|
||||
bool AllContants = true;
|
||||
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
|
||||
SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
|
||||
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Imm);
|
||||
SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
// Vector has one or more non-const elements
|
||||
uint64_t Immediate = 0;
|
||||
int NonConstIdx = -1;
|
||||
SmallVector<unsigned, 16> NonConstIdx;
|
||||
bool IsSplat = true;
|
||||
unsigned NumNonConsts = 0;
|
||||
unsigned NumConsts = 0;
|
||||
bool HasConstElts = false;
|
||||
int SplatIdx = -1;
|
||||
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
|
||||
SDValue In = Op.getOperand(idx);
|
||||
if (In.getOpcode() == ISD::UNDEF)
|
||||
continue;
|
||||
if (!isa<ConstantSDNode>(In)) {
|
||||
AllContants = false;
|
||||
NonConstIdx = idx;
|
||||
NumNonConsts++;
|
||||
} else {
|
||||
NumConsts++;
|
||||
if (cast<ConstantSDNode>(In)->getZExtValue())
|
||||
Immediate |= (1ULL << idx);
|
||||
if (!isa<ConstantSDNode>(In))
|
||||
NonConstIdx.push_back(idx);
|
||||
else {
|
||||
Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
|
||||
HasConstElts = true;
|
||||
}
|
||||
if (In != Op.getOperand(0))
|
||||
if (SplatIdx == -1)
|
||||
SplatIdx = idx;
|
||||
else if (In != Op.getOperand(SplatIdx))
|
||||
IsSplat = false;
|
||||
}
|
||||
|
||||
if (AllContants) {
|
||||
SDValue FullMask = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1,
|
||||
DAG.getConstant(Immediate, dl, MVT::i16));
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, FullMask,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
// for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
|
||||
if (IsSplat)
|
||||
return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx),
|
||||
DAG.getConstant(1, dl, VT),
|
||||
DAG.getConstant(0, dl, VT));
|
||||
|
||||
// insert elements one by one
|
||||
SDValue DstVec;
|
||||
SDValue Imm;
|
||||
if (Immediate) {
|
||||
MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
|
||||
Imm = DAG.getConstant(Immediate, dl, ImmVT);
|
||||
}
|
||||
else if (HasConstElts)
|
||||
Imm = DAG.getConstant(0, dl, VT);
|
||||
else
|
||||
Imm = DAG.getUNDEF(VT);
|
||||
if (Imm.getValueSizeInBits() == VT.getSizeInBits())
|
||||
DstVec = DAG.getNode(ISD::BITCAST, dl, VT, Imm);
|
||||
else {
|
||||
SDValue ExtVec = DAG.getNode(ISD::BITCAST, dl, MVT::v8i1, Imm);
|
||||
DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
|
||||
DAG.getIntPtrConstant(0, dl));
|
||||
}
|
||||
|
||||
if (NumNonConsts == 1 && NonConstIdx != 0) {
|
||||
SDValue DstVec;
|
||||
if (NumConsts) {
|
||||
SDValue VecAsImm = DAG.getConstant(Immediate, dl,
|
||||
MVT::getIntegerVT(VT.getSizeInBits()));
|
||||
DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
|
||||
}
|
||||
else
|
||||
DstVec = DAG.getUNDEF(VT);
|
||||
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
|
||||
Op.getOperand(NonConstIdx),
|
||||
DAG.getIntPtrConstant(NonConstIdx, dl));
|
||||
for (unsigned i = 0; i < NonConstIdx.size(); ++i) {
|
||||
unsigned InsertIdx = NonConstIdx[i];
|
||||
DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
|
||||
Op.getOperand(InsertIdx),
|
||||
DAG.getIntPtrConstant(InsertIdx, dl));
|
||||
}
|
||||
if (!IsSplat && (NonConstIdx != 0))
|
||||
llvm_unreachable("Unsupported BUILD_VECTOR operation");
|
||||
MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
|
||||
SDValue Select;
|
||||
if (IsSplat)
|
||||
Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
|
||||
DAG.getConstant(-1, dl, SelectVT),
|
||||
DAG.getConstant(0, dl, SelectVT));
|
||||
else
|
||||
Select = DAG.getNode(ISD::SELECT, dl, SelectVT, Op.getOperand(0),
|
||||
DAG.getConstant((Immediate | 1), dl, SelectVT),
|
||||
DAG.getConstant(Immediate, dl, SelectVT));
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, Select);
|
||||
return DstVec;
|
||||
}
|
||||
|
||||
/// \brief Return true if \p N implements a horizontal binop and return the
|
||||
|
@ -10670,15 +10698,11 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
|
||||
if (IdxVal)
|
||||
EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
if (Vec.getOpcode() == ISD::UNDEF)
|
||||
return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i8));
|
||||
const TargetRegisterClass* rc = getRegClassFor(VecVT);
|
||||
unsigned MaxSift = rc->getSize()*8 - 1;
|
||||
EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
|
||||
DAG.getConstant(MaxSift, dl, MVT::i8));
|
||||
EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec,
|
||||
DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
|
||||
return EltInVec;
|
||||
return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
|
||||
}
|
||||
|
||||
|
@ -13623,6 +13647,29 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
|
|||
}
|
||||
}
|
||||
|
||||
if (VT.isVector() && VT.getScalarType() == MVT::i1) {
|
||||
SDValue Op1Scalar;
|
||||
if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
|
||||
Op1Scalar = ConvertI1VectorToInterger(Op1, DAG);
|
||||
else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
|
||||
Op1Scalar = Op1.getOperand(0);
|
||||
SDValue Op2Scalar;
|
||||
if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
|
||||
Op2Scalar = ConvertI1VectorToInterger(Op2, DAG);
|
||||
else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
|
||||
Op2Scalar = Op2.getOperand(0);
|
||||
if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
|
||||
SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
|
||||
Op1Scalar.getValueType(),
|
||||
Cond, Op1Scalar, Op2Scalar);
|
||||
if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, newSelect);
|
||||
SDValue ExtVec = DAG.getNode(ISD::BITCAST, DL, MVT::v8i1, newSelect);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
|
||||
DAG.getIntPtrConstant(0, DL));
|
||||
}
|
||||
}
|
||||
|
||||
if (VT == MVT::v4i1 || VT == MVT::v2i1) {
|
||||
SDValue zeroConst = DAG.getIntPtrConstant(0, DL);
|
||||
Op1 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
|
||||
|
@ -20728,7 +20775,8 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
|
|||
if (!InVec.hasOneUse())
|
||||
return SDValue();
|
||||
EVT BCVT = InVec.getOperand(0).getValueType();
|
||||
if (BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
|
||||
if (!BCVT.isVector() ||
|
||||
BCVT.getVectorNumElements() != OriginalVT.getVectorNumElements())
|
||||
return SDValue();
|
||||
InVec = InVec.getOperand(0);
|
||||
}
|
||||
|
@ -20833,7 +20881,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
|
|||
return NewOp;
|
||||
|
||||
SDValue InputVector = N->getOperand(0);
|
||||
|
||||
SDLoc dl(InputVector);
|
||||
// Detect mmx to i32 conversion through a v2i32 elt extract.
|
||||
if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
|
||||
N->getValueType(0) == MVT::i32 &&
|
||||
|
@ -20858,6 +20906,18 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
|
|||
MMXSrcOp.getOperand(0));
|
||||
}
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (VT == MVT::i1 && dyn_cast<ConstantSDNode>(N->getOperand(1)) &&
|
||||
InputVector.getOpcode() == ISD::BITCAST &&
|
||||
dyn_cast<ConstantSDNode>(InputVector.getOperand(0))) {
|
||||
uint64_t ExtractedElt =
|
||||
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
|
||||
uint64_t InputValue =
|
||||
cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
|
||||
uint64_t Res = (InputValue >> ExtractedElt) & 1;
|
||||
return DAG.getConstant(Res, dl, MVT::i1);
|
||||
}
|
||||
// Only operate on vectors of 4 elements, where the alternative shuffling
|
||||
// gets to be more expensive.
|
||||
if (InputVector.getValueType() != MVT::v4i32)
|
||||
|
@ -20903,7 +20963,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
|
|||
// otherwise bounce the vector off the cache.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
SDValue Vals[4];
|
||||
SDLoc dl(InputVector);
|
||||
|
||||
if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
|
||||
SDValue Cst = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, InputVector);
|
||||
|
@ -23606,6 +23665,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||
const X86Subtarget *Subtarget) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc dl(N);
|
||||
|
||||
// (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
|
||||
// (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
|
||||
|
@ -23613,7 +23673,6 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||
// from AH (which we otherwise need to do contortions to access).
|
||||
if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
|
||||
N0.getValueType() == MVT::i8 && VT == MVT::i32) {
|
||||
SDLoc dl(N);
|
||||
SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
|
||||
SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
|
||||
N0.getOperand(0), N0.getOperand(1));
|
||||
|
@ -23621,8 +23680,15 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
|
|||
return R.getValue(1);
|
||||
}
|
||||
|
||||
if (!DCI.isBeforeLegalizeOps())
|
||||
if (!DCI.isBeforeLegalizeOps()) {
|
||||
if (N0.getValueType() == MVT::i1) {
|
||||
SDValue Zero = DAG.getConstant(0, dl, VT);
|
||||
SDValue AllOnes =
|
||||
DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT);
|
||||
return DAG.getNode(ISD::SELECT, dl, VT, N0, AllOnes, Zero);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
if (!Subtarget->hasFp256())
|
||||
return SDValue();
|
||||
|
|
|
@ -1855,7 +1855,9 @@ let Predicates = [HasAVX512] in {
|
|||
def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
|
||||
(KMOVWmk addr:$dst, VK16:$src)>;
|
||||
def : Pat<(i1 (load addr:$src)),
|
||||
(COPY_TO_REGCLASS (KMOVWkm addr:$src), VK1)>;
|
||||
(COPY_TO_REGCLASS (AND16ri (i16 (SUBREG_TO_REG (i32 0),
|
||||
(MOV8rm addr:$src), sub_8bit)),
|
||||
(i16 1)), VK1)>;
|
||||
def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
|
||||
(KMOVWkm addr:$src)>;
|
||||
}
|
||||
|
@ -1920,13 +1922,13 @@ let Predicates = [HasAVX512, NoDQI] in {
|
|||
// GR from/to 8-bit mask without native support
|
||||
def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
|
||||
(COPY_TO_REGCLASS
|
||||
(KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
|
||||
VK8)>;
|
||||
(KMOVWkr (MOVZX32rr8 GR8 :$src)), VK8)>;
|
||||
def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
|
||||
(EXTRACT_SUBREG
|
||||
(KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
|
||||
sub_8bit)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512] in {
|
||||
def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
|
||||
(COPY_TO_REGCLASS VK16:$src, VK1)>;
|
||||
|
|
|
@ -1064,11 +1064,12 @@ defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
|
|||
defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
|
||||
|
||||
// zextload bool -> zextload byte
|
||||
def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>;
|
||||
def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
|
||||
def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
|
||||
def : Pat<(zextloadi8i1 addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
|
||||
def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>;
|
||||
def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>;
|
||||
def : Pat<(zextloadi64i1 addr:$src),
|
||||
(SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
(AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
|
||||
|
||||
// extload bool -> extload byte
|
||||
// When extloading from 16-bit and smaller memory locations into 64-bit
|
||||
|
|
|
@ -137,10 +137,12 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
|
|||
}
|
||||
|
||||
;CHECK-LABEL: test13
|
||||
;CHECK: cmpl
|
||||
;CHECK: sbbl
|
||||
;CHECK: orl $65532
|
||||
;CHECK: ret
|
||||
;CHECK: cmpl %esi, %edi
|
||||
;CHECK: setb %al
|
||||
;CHECK: andl $1, %eax
|
||||
;CHECK: kmovw %eax, %k0
|
||||
;CHECK: movw $-4
|
||||
;CHECK: korw
|
||||
define i16 @test13(i32 %a, i32 %b) {
|
||||
%cmp_res = icmp ult i32 %a, %b
|
||||
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
|
||||
|
@ -167,19 +169,22 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
|
|||
}
|
||||
|
||||
;CHECK-LABEL: test15
|
||||
;CHECK: kshiftlw
|
||||
;CHECK: kmovw
|
||||
;CHECK: ret
|
||||
;CHECK: movb (%rdi), %al
|
||||
;CHECK: andb $1, %al
|
||||
;CHECK: movw $-1, %ax
|
||||
;CHECK: cmovew
|
||||
define i16 @test15(i1 *%addr) {
|
||||
%x = load i1 , i1 * %addr, align 128
|
||||
%x = load i1 , i1 * %addr, align 1
|
||||
%x1 = insertelement <16 x i1> undef, i1 %x, i32 10
|
||||
%x2 = bitcast <16 x i1>%x1 to i16
|
||||
ret i16 %x2
|
||||
}
|
||||
|
||||
;CHECK-LABEL: test16
|
||||
;CHECK: kshiftlw
|
||||
;CHECK: kshiftrw
|
||||
;CHECK: movb (%rdi), %al
|
||||
;CHECK: andw $1, %ax
|
||||
;CHECK: kmovw
|
||||
;CHECK: kshiftlw $10
|
||||
;CHECK: korw
|
||||
;CHECK: ret
|
||||
define i16 @test16(i1 *%addr, i16 %a) {
|
||||
|
@ -191,11 +196,11 @@ define i16 @test16(i1 *%addr, i16 %a) {
|
|||
}
|
||||
|
||||
;CHECK-LABEL: test17
|
||||
;KNL: kshiftlw
|
||||
;KNL: kshiftrw
|
||||
;KNL: movb (%rdi), %al
|
||||
;KNL: andw $1, %ax
|
||||
;KNL: kshiftlw $4
|
||||
;KNL: korw
|
||||
;SKX: kshiftlb
|
||||
;SKX: kshiftrb
|
||||
;SKX: kshiftlb $4
|
||||
;SKX: korb
|
||||
;CHECK: ret
|
||||
define i8 @test17(i1 *%addr, i8 %a) {
|
||||
|
|
|
@ -191,7 +191,7 @@ false:
|
|||
|
||||
; SKX-LABEL: test7
|
||||
; SKX: vpmovw2m
|
||||
; SKX: kmovw %eax, %k1
|
||||
; SKX: kmovb %eax, %k1
|
||||
; SKX: korb
|
||||
|
||||
define void @test7(<8 x i1> %mask) {
|
||||
|
@ -282,3 +282,114 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
|
|||
ret <4 x i1>%c
|
||||
}
|
||||
|
||||
; KNL-LABEL: test12
|
||||
; KNL: movl %edi, %eax
|
||||
define i32 @test12(i32 %x, i32 %y) {
|
||||
%a = bitcast i16 21845 to <16 x i1>
|
||||
%b = extractelement <16 x i1> %a, i32 0
|
||||
%c = select i1 %b, i32 %x, i32 %y
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; KNL-LABEL: test13
|
||||
; KNL: movl %esi, %eax
|
||||
define i32 @test13(i32 %x, i32 %y) {
|
||||
%a = bitcast i16 21845 to <16 x i1>
|
||||
%b = extractelement <16 x i1> %a, i32 3
|
||||
%c = select i1 %b, i32 %x, i32 %y
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; SKX-LABEL: test14
|
||||
; SKX: movb $11, %al
|
||||
; SKX: kmovb %eax, %k0
|
||||
; SKX: vpmovm2d %k0, %xmm0
|
||||
|
||||
define <4 x i1> @test14() {
|
||||
%a = bitcast i16 21845 to <16 x i1>
|
||||
%b = extractelement <16 x i1> %a, i32 2
|
||||
%c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
|
||||
ret <4 x i1> %c
|
||||
}
|
||||
|
||||
; KNL-LABEL: test15
|
||||
; KNL: cmovgw
|
||||
define <16 x i1> @test15(i32 %x, i32 %y) {
|
||||
%a = bitcast i16 21845 to <16 x i1>
|
||||
%b = bitcast i16 1 to <16 x i1>
|
||||
%mask = icmp sgt i32 %x, %y
|
||||
%c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
|
||||
ret <16 x i1> %c
|
||||
}
|
||||
|
||||
; SKX-LABEL: test16
|
||||
; SKX: kxnorw %k1, %k1, %k1
|
||||
; SKX: kshiftrw $15, %k1, %k1
|
||||
; SKX: kshiftlq $5, %k1, %k1
|
||||
; SKX: korq %k1, %k0, %k0
|
||||
; SKX: vpmovm2b %k0, %zmm0
|
||||
define <64 x i8> @test16(i64 %x) {
|
||||
%a = bitcast i64 %x to <64 x i1>
|
||||
%b = insertelement <64 x i1>%a, i1 true, i32 5
|
||||
%c = sext <64 x i1>%b to <64 x i8>
|
||||
ret <64 x i8>%c
|
||||
}
|
||||
|
||||
; SKX-LABEL: test17
|
||||
; SKX: setg %al
|
||||
; SKX: andl $1, %eax
|
||||
; SKX: kmovw %eax, %k1
|
||||
; SKX: kshiftlq $5, %k1, %k1
|
||||
; SKX: korq %k1, %k0, %k0
|
||||
; SKX: vpmovm2b %k0, %zmm0
|
||||
define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
|
||||
%a = bitcast i64 %x to <64 x i1>
|
||||
%b = icmp sgt i32 %y, %z
|
||||
%c = insertelement <64 x i1>%a, i1 %b, i32 5
|
||||
%d = sext <64 x i1>%c to <64 x i8>
|
||||
ret <64 x i8>%d
|
||||
}
|
||||
|
||||
; KNL-LABEL: test18
|
||||
define <8 x i1> @test18(i8 %a, i16 %y) {
|
||||
%b = bitcast i8 %a to <8 x i1>
|
||||
%b1 = bitcast i16 %y to <16 x i1>
|
||||
%el1 = extractelement <16 x i1>%b1, i32 8
|
||||
%el2 = extractelement <16 x i1>%b1, i32 9
|
||||
%c = insertelement <8 x i1>%b, i1 %el1, i32 7
|
||||
%d = insertelement <8 x i1>%c, i1 %el2, i32 6
|
||||
ret <8 x i1>%d
|
||||
}
|
||||
|
||||
; KNL-LABEL: test19
|
||||
; KNL: movzbl %dil, %eax
|
||||
; KNL: kmovw %eax, %k0
|
||||
; KNL: kshiftlw $13, %k0, %k0
|
||||
; KNL: kshiftrw $15, %k0, %k0
|
||||
; KNL: kmovw %k0, %eax
|
||||
; KNL: andl $1, %eax
|
||||
; KNL: testb %al, %al
|
||||
|
||||
define <8 x i1> @test19(i8 %a) {
|
||||
%b = bitcast i8 %a to <8 x i1>
|
||||
%c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
|
||||
ret <8 x i1> %c
|
||||
}
|
||||
|
||||
; KNL-LABEL: test20
|
||||
; KNL: movzbl %dil, %eax
|
||||
; KNL: kmovw %eax, %k0
|
||||
; KNL: kshiftlw $13, %k0, %k1
|
||||
; KNL: kshiftrw $15, %k1, %k1
|
||||
; KNL: kshiftlw $12, %k0, %k0
|
||||
; KNL: kshiftrw $15, %k0, %k0
|
||||
; KNL: kshiftlw $4, %k0, %k0
|
||||
; KNL: kshiftlw $1, %k1, %k2
|
||||
; KNL: korw %k0, %k2, %k0
|
||||
; KNL: kshiftlw $6, %k1, %k1
|
||||
; KNL: korw %k1, %k0, %k1
|
||||
define <8 x i1> @test20(i8 %a, i16 %y) {
|
||||
%b = bitcast i8 %a to <8 x i1>
|
||||
%c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
|
||||
ret <8 x i1> %c
|
||||
}
|
||||
|
|
|
@ -50,8 +50,10 @@ define <16 x double> @select04(<16 x double> %a, <16 x double> %b) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: select05
|
||||
; CHECK: kmovw %esi, %k0
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK: movzbl %sil, %eax
|
||||
; CHECK: kmovw %eax, %k0
|
||||
; CHECK: movzbl %dil, %eax
|
||||
; CHECK: kmovw %eax, %k1
|
||||
; CHECK-NEXT: korw %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
define i8 @select05(i8 %a.0, i8 %m) {
|
||||
|
@ -63,8 +65,10 @@ define i8 @select05(i8 %a.0, i8 %m) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: select06
|
||||
; CHECK: kmovw %esi, %k0
|
||||
; CHECK-NEXT: kmovw %edi, %k1
|
||||
; CHECK: movzbl %sil, %eax
|
||||
; CHECK: kmovw %eax, %k0
|
||||
; CHECK: movzbl %dil, %eax
|
||||
; CHECK: kmovw %eax, %k1
|
||||
; CHECK-NEXT: kandw %k1, %k0, %k0
|
||||
; CHECK-NEXT: kmovw %k0, %eax
|
||||
define i8 @select06(i8 %a.0, i8 %m) {
|
||||
|
@ -76,9 +80,12 @@ define i8 @select06(i8 %a.0, i8 %m) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: select07
|
||||
; CHECK-DAG: kmovw %edx, %k0
|
||||
; CHECK-DAG: kmovw %edi, %k1
|
||||
; CHECK-DAG: kmovw %esi, %k2
|
||||
; CHECK-DAG: movzbl %dl, %eax
|
||||
; CHECK-DAG: kmovw %eax, %k0
|
||||
; CHECK-DAG: movzbl %dil, %eax
|
||||
; CHECK-DAG: kmovw %eax, %k1
|
||||
; CHECK-DAG: movzbl %sil, %eax
|
||||
; CHECK-DAG: kmovw %eax, %k2
|
||||
; CHECK: kandw %k0, %k1, %k1
|
||||
; CHECK-NEXT: knotw %k0, %k0
|
||||
; CHECK-NEXT: kandw %k0, %k2, %k0
|
||||
|
|
|
@ -156,10 +156,9 @@ define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
|
|||
}
|
||||
|
||||
; CHECK-LABEL: trunc_i32_to_i1
|
||||
; CHECK: testb
|
||||
; CHECK: setne
|
||||
; CKECK: orl
|
||||
; CHECK: ret
|
||||
; CHECK: movw $-4, %ax
|
||||
; CHECK: kmovw %eax, %k1
|
||||
; CKECK: korw
|
||||
define i16 @trunc_i32_to_i1(i32 %a) {
|
||||
%a_i = trunc i32 %a to i1
|
||||
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
|
||||
|
|
Loading…
Reference in New Issue