forked from OSchip/llvm-project
[Hexagon] Widen loads and handle any-/sign-/zero-extensions
This commit is contained in:
parent
6352381039
commit
bb877d1af2
|
@ -1863,6 +1863,8 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
|
||||
case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
|
||||
case HexagonISD::VPACKL: return "HexagonISD::VPACKL";
|
||||
case HexagonISD::VUNPACK: return "HexagonISD::VUNPACK";
|
||||
case HexagonISD::VUNPACKU: return "HexagonISD::VUNPACKU";
|
||||
case HexagonISD::OP_END: break;
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -2650,6 +2652,28 @@ HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
|
|||
llvm_unreachable("Invalid type for zero");
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
|
||||
const {
|
||||
MVT ValTy = ty(Val);
|
||||
assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
|
||||
|
||||
unsigned ValLen = ValTy.getVectorNumElements();
|
||||
unsigned ResLen = ResTy.getVectorNumElements();
|
||||
if (ValLen == ResLen)
|
||||
return Val;
|
||||
|
||||
const SDLoc &dl(Val);
|
||||
assert(ValLen < ResLen);
|
||||
assert(ResLen % ValLen == 0);
|
||||
|
||||
SmallVector<SDValue, 4> Concats = {Val};
|
||||
for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
|
||||
Concats.push_back(DAG.getUNDEF(ValTy));
|
||||
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats);
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||
MVT VecTy = ty(Op);
|
||||
|
|
|
@ -94,6 +94,8 @@ enum NodeType : unsigned {
|
|||
// the low halfwords and pack them into the first 32
|
||||
// halfwords of the output. The rest of the output is
|
||||
// unspecified.
|
||||
VUNPACK, // Unpacking into low elements with sign extension.
|
||||
VUNPACKU, // Unpacking into low elements with zero extension.
|
||||
OP_END
|
||||
};
|
||||
|
||||
|
@ -367,6 +369,7 @@ private:
|
|||
SDValue contractPredicate(SDValue Vec64, const SDLoc &dl,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG) const;
|
||||
|
||||
bool isUndef(SDValue Op) const {
|
||||
if (Op.isMachineOpcode())
|
||||
|
@ -481,7 +484,9 @@ private:
|
|||
|
||||
SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
std::pair<const TargetRegisterClass*, uint8_t>
|
||||
|
|
|
@ -234,8 +234,12 @@ HexagonTargetLowering::initializeHVXLowering() {
|
|||
MVT VecTy = MVT::getVectorVT(ElemTy, N);
|
||||
auto Action = getPreferredVectorAction(VecTy);
|
||||
if (Action == TargetLoweringBase::TypeWidenVector) {
|
||||
setOperationAction(ISD::STORE, VecTy, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, VecTy, Custom);
|
||||
setOperationAction(ISD::LOAD, VecTy, Custom);
|
||||
setOperationAction(ISD::STORE, VecTy, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, VecTy, Custom);
|
||||
setOperationAction(ISD::ANY_EXTEND, VecTy, Custom);
|
||||
setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom);
|
||||
setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1886,6 +1890,38 @@ HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
|
|||
llvm_unreachable(Name.c_str());
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
|
||||
const SDLoc &dl(Op);
|
||||
auto *LoadN = cast<LoadSDNode>(Op.getNode());
|
||||
assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
|
||||
assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
|
||||
"Not widening loads of i1 yet");
|
||||
|
||||
SDValue Chain = LoadN->getChain();
|
||||
SDValue Base = LoadN->getBasePtr();
|
||||
SDValue Offset = DAG.getUNDEF(MVT::i32);
|
||||
|
||||
MVT ResTy = ty(Op);
|
||||
unsigned HwLen = Subtarget.getVectorLength();
|
||||
unsigned ResLen = ResTy.getStoreSize();
|
||||
assert(ResLen < HwLen && "vsetq(v1) prerequisite");
|
||||
|
||||
MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
|
||||
SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
|
||||
{DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
|
||||
|
||||
MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
|
||||
|
||||
SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
|
||||
DAG.getUNDEF(LoadTy), LoadTy, MemOp,
|
||||
ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
|
||||
SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
|
||||
return DAG.getMergeValues({Value, Chain}, dl);
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
|
||||
const SDLoc &dl(Op);
|
||||
|
@ -1912,12 +1948,45 @@ HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
|
||||
MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
|
||||
SDValue StoreQ = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
|
||||
{DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
|
||||
SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
|
||||
{DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
auto *MOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
|
||||
return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, StoreQ, ty(Value),
|
||||
MOp, ISD::UNINDEXED, false, false);
|
||||
auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
|
||||
return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
|
||||
MemOp, ISD::UNINDEXED, false, false);
|
||||
}
|
||||
|
||||
SDValue
|
||||
HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const {
|
||||
const SDLoc &dl(Op);
|
||||
unsigned HwWidth = 8*Subtarget.getVectorLength();
|
||||
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
MVT ResTy = ty(Op);
|
||||
MVT OpTy = ty(Op0);
|
||||
if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
|
||||
return SDValue();
|
||||
|
||||
// .-res, op-> ScalarVec Illegal HVX
|
||||
// Scalar ok - -
|
||||
// Illegal widen(insert) widen -
|
||||
// HVX - widen ok
|
||||
|
||||
auto getFactor = [HwWidth](MVT Ty) {
|
||||
unsigned Width = Ty.getSizeInBits();
|
||||
return HwWidth > Width ? HwWidth / Width : 1;
|
||||
};
|
||||
|
||||
auto getWideTy = [getFactor](MVT Ty) {
|
||||
unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
|
||||
return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
|
||||
};
|
||||
|
||||
unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK
|
||||
: HexagonISD::VUNPACKU;
|
||||
SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
|
||||
SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp);
|
||||
return WideRes;
|
||||
}
|
||||
|
||||
SDValue
|
||||
|
@ -1931,10 +2000,10 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
|
|||
if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
|
||||
return SDValue();
|
||||
|
||||
// .-res, op-> Scalar Illegal HVX
|
||||
// Scalar ok extract(widen) -
|
||||
// Illegal - widen widen
|
||||
// HVX - - ok
|
||||
// .-res, op-> ScalarVec Illegal HVX
|
||||
// Scalar ok extract(widen) -
|
||||
// Illegal - widen widen
|
||||
// HVX - - ok
|
||||
|
||||
auto getFactor = [HwWidth](MVT Ty) {
|
||||
unsigned Width = Ty.getSizeInBits();
|
||||
|
@ -1952,17 +2021,13 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
|
|||
|
||||
assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?");
|
||||
|
||||
MVT WideOpTy = getWideTy(OpTy);
|
||||
SmallVector<SDValue, 4> Concats = {Op0};
|
||||
for (int i = 0, e = getFactor(OpTy) - 1; i != e; ++i)
|
||||
Concats.push_back(DAG.getUNDEF(OpTy));
|
||||
|
||||
SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideOpTy, Concats);
|
||||
SDValue V = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Cat);
|
||||
SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG);
|
||||
SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy),
|
||||
WideOp);
|
||||
// If the original result wasn't legal and was supposed to be widened,
|
||||
// we're done.
|
||||
if (shouldWidenToHvx(ResTy, DAG))
|
||||
return V;
|
||||
return WideRes;
|
||||
|
||||
// The original result type wasn't meant to be widened to HVX, so
|
||||
// leave it as it is. Standard legalization should be able to deal
|
||||
|
@ -1970,7 +2035,7 @@ HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const {
|
|||
// node).
|
||||
assert(ResTy.isVector());
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy,
|
||||
{V, getZero(dl, MVT::i32, DAG)});
|
||||
{WideRes, getZero(dl, MVT::i32, DAG)});
|
||||
}
|
||||
|
||||
SDValue
|
||||
|
@ -2053,12 +2118,18 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
|
|||
SDValue Op(N, 0);
|
||||
|
||||
switch (Opc) {
|
||||
case ISD::TRUNCATE: {
|
||||
case ISD::ANY_EXTEND:
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?");
|
||||
if (SDValue T = WidenHvxExtend(Op, DAG))
|
||||
Results.push_back(T);
|
||||
break;
|
||||
case ISD::TRUNCATE:
|
||||
assert(shouldWidenToHvx(ty(Op.getOperand(0)), DAG) && "Not widening?");
|
||||
if (SDValue T = WidenHvxTruncate(Op, DAG))
|
||||
Results.push_back(T);
|
||||
break;
|
||||
}
|
||||
case ISD::STORE: {
|
||||
assert(shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG) &&
|
||||
"Not widening?");
|
||||
|
@ -2089,11 +2160,25 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
|
|||
unsigned Opc = N->getOpcode();
|
||||
SDValue Op(N, 0);
|
||||
switch (Opc) {
|
||||
case ISD::TRUNCATE: {
|
||||
case ISD::ANY_EXTEND:
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?");
|
||||
if (SDValue T = WidenHvxExtend(Op, DAG))
|
||||
Results.push_back(T);
|
||||
break;
|
||||
case ISD::TRUNCATE:
|
||||
assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?");
|
||||
if (SDValue T = WidenHvxTruncate(Op, DAG))
|
||||
Results.push_back(T);
|
||||
break;
|
||||
case ISD::LOAD: {
|
||||
assert(shouldWidenToHvx(ty(Op), DAG) && "Not widening?");
|
||||
SDValue Load = WidenHvxLoad(Op, DAG);
|
||||
assert(Load->getOpcode() == ISD::MERGE_VALUES);
|
||||
Results.push_back(Load.getOperand(0));
|
||||
Results.push_back(Load.getOperand(1));
|
||||
break;
|
||||
}
|
||||
case ISD::BITCAST:
|
||||
if (isHvxBoolTy(ty(N->getOperand(0)))) {
|
||||
|
|
|
@ -41,6 +41,8 @@ def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>;
|
|||
def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>;
|
||||
def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>;
|
||||
def HexagonVPACKL: SDNode<"HexagonISD::VPACKL", SDTVecUnaryOp>;
|
||||
def HexagonVUNPACK: SDNode<"HexagonISD::VUNPACK", SDTVecUnaryOp>;
|
||||
def HexagonVUNPACKU: SDNode<"HexagonISD::VUNPACKU", SDTVecUnaryOp>;
|
||||
|
||||
def vzero: PatFrag<(ops), (HexagonVZERO)>;
|
||||
def qtrue: PatFrag<(ops), (HexagonQTRUE)>;
|
||||
|
@ -48,8 +50,10 @@ def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
|
|||
def qcat: PatFrag<(ops node:$Qs, node:$Qt),
|
||||
(HexagonQCAT node:$Qs, node:$Qt)>;
|
||||
|
||||
def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
|
||||
def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>;
|
||||
def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
|
||||
def vpackl: PatFrag<(ops node:$Vs), (HexagonVPACKL node:$Vs)>;
|
||||
def vunpack: PatFrag<(ops node:$Vs), (HexagonVUNPACK node:$Vs)>;
|
||||
def vunpacku: PatFrag<(ops node:$Vs), (HexagonVUNPACKU node:$Vs)>;
|
||||
|
||||
def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>;
|
||||
def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>;
|
||||
|
@ -416,6 +420,20 @@ let Predicates = [UseHVX] in {
|
|||
def: Pat<(VecI8 (vpackl HWI32:$Vs)), (V6_vdealb4w (HiVec $Vs), (LoVec $Vs))>;
|
||||
def: Pat<(VecI16 (vpackl HWI32:$Vs)), (V6_vpackeh (HiVec $Vs), (LoVec $Vs))>;
|
||||
|
||||
def: Pat<(VecI16 (vunpack HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
|
||||
def: Pat<(VecI32 (vunpack HVI8:$Vs)), (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
|
||||
def: Pat<(VecI32 (vunpack HVI16:$Vs)), (LoVec (VSxth $Vs))>;
|
||||
def: Pat<(VecPI16 (vunpack HVI8:$Vs)), (VSxtb $Vs)>;
|
||||
def: Pat<(VecPI32 (vunpack HVI8:$Vs)), (VSxth (LoVec (VSxtb $Vs)))>;
|
||||
def: Pat<(VecPI32 (vunpack HVI32:$Vs)), (VSxth $Vs)>;
|
||||
|
||||
def: Pat<(VecI16 (vunpacku HVI8:$Vs)), (LoVec (VZxtb $Vs))>;
|
||||
def: Pat<(VecI32 (vunpacku HVI8:$Vs)), (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
|
||||
def: Pat<(VecI32 (vunpacku HVI16:$Vs)), (LoVec (VZxth $Vs))>;
|
||||
def: Pat<(VecPI16 (vunpacku HVI8:$Vs)), (VZxtb $Vs)>;
|
||||
def: Pat<(VecPI32 (vunpacku HVI8:$Vs)), (VZxth (LoVec (VZxtb $Vs)))>;
|
||||
def: Pat<(VecPI32 (vunpacku HVI32:$Vs)), (VZxth $Vs)>;
|
||||
|
||||
def: Pat<(VecI16 (bswap HVI16:$Vs)),
|
||||
(V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>;
|
||||
def: Pat<(VecI32 (bswap HVI32:$Vs)),
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
; RUN: llc -march=hexagon -hexagon-hvx-widen=32 < %s | FileCheck %s
|
||||
|
||||
; v32i8 -> v32i16
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK: r[[R0:[0-9]+]] = #64
|
||||
; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
|
||||
; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
|
||||
; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
|
||||
; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]]
|
||||
define void @f0(<32 x i8>* %a0, <32 x i16>* %a1) #0 {
|
||||
%v0 = load <32 x i8>, <32 x i8>* %a0, align 128
|
||||
%v1 = sext <32 x i8> %v0 to <32 x i16>
|
||||
store <32 x i16> %v1, <32 x i16>* %a1, align 128
|
||||
ret void
|
||||
}
|
||||
|
||||
; v32i8 -> v32i32
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
|
||||
; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
|
||||
; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h)
|
||||
; CHECK: vmem(r1+#0) = v[[V4]]
|
||||
define void @f1(<32 x i8>* %a0, <32 x i32>* %a1) #0 {
|
||||
%v0 = load <32 x i8>, <32 x i8>* %a0, align 128
|
||||
%v1 = sext <32 x i8> %v0 to <32 x i32>
|
||||
store <32 x i32> %v1, <32 x i32>* %a1, align 128
|
||||
ret void
|
||||
}
|
||||
|
||||
; v64i8 -> v64i16
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
|
||||
; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
|
||||
; CHECK: vmem(r1+#0) = v[[V2]]
|
||||
define void @f2(<64 x i8>* %a0, <64 x i16>* %a1) #0 {
|
||||
%v0 = load <64 x i8>, <64 x i8>* %a0, align 128
|
||||
%v1 = sext <64 x i8> %v0 to <64 x i16>
|
||||
store <64 x i16> %v1, <64 x i16>* %a1, align 128
|
||||
ret void
|
||||
}
|
||||
|
||||
; v64i8 -> v64i32
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
|
||||
; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
|
||||
; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h)
|
||||
; CHECK-DAG: vmem(r1+#0) = v[[V4]]
|
||||
; CHECK-DAG: vmem(r1+#1) = v[[V3]]
|
||||
define void @f3(<64 x i8>* %a0, <64 x i32>* %a1) #0 {
|
||||
%v0 = load <64 x i8>, <64 x i8>* %a0, align 128
|
||||
%v1 = sext <64 x i8> %v0 to <64 x i32>
|
||||
store <64 x i32> %v1, <64 x i32>* %a1, align 128
|
||||
ret void
|
||||
}
|
||||
|
||||
; v16i16 -> v16i32
|
||||
; CHECK-LABEL: f4:
|
||||
; CHECK: r[[R0:[0-9]+]] = #64
|
||||
; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
|
||||
; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].w = vunpack(v[[V0]].h)
|
||||
; CHECK: q[[Q0:[0-3]]] = vsetq(r[[R0]])
|
||||
; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V2]]
|
||||
define void @f4(<16 x i16>* %a0, <16 x i32>* %a1) #0 {
|
||||
%v0 = load <16 x i16>, <16 x i16>* %a0, align 128
|
||||
%v1 = sext <16 x i16> %v0 to <16 x i32>
|
||||
store <16 x i32> %v1, <16 x i32>* %a1, align 128
|
||||
ret void
|
||||
}
|
||||
|
||||
; v32i16 -> v32i32
|
||||
; CHECK-LABEL: f5:
|
||||
; CHECK: v[[V0:[0-9]+]] = vmem(r0+#0)
|
||||
; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].w = vunpack(v[[V0]].h)
|
||||
; CHECK: vmem(r1+#0) = v[[V2]]
|
||||
define void @f5(<32 x i16>* %a0, <32 x i32>* %a1) #0 {
|
||||
%v0 = load <32 x i16>, <32 x i16>* %a0, align 128
|
||||
%v1 = sext <32 x i16> %v0 to <32 x i32>
|
||||
store <32 x i32> %v1, <32 x i32>* %a1, align 128
|
||||
ret void
|
||||
}
|
||||
|
||||
; v8i8 -> v8i32
|
||||
; CHECK-LABEL: f6:
|
||||
; CHECK: r[[R0:[0-9]+]]:[[R1:[0-9]+]] = memd(r0+#0)
|
||||
; CHECK-DAG: v[[V0:[0-9]+]].w = vinsert(r[[R0]])
|
||||
; CHECK-DAG: v[[V0]].w = vinsert(r[[R1]])
|
||||
; CHECK-DAG: q[[Q0:[0-3]]] = vsetq
|
||||
; CHECK: v[[V1:[0-9]+]]:[[V2:[0-9]+]].h = vunpack(v[[V0]].b)
|
||||
; CHECK: v[[V3:[0-9]+]]:[[V4:[0-9]+]].w = vunpack(v[[V2]].h)
|
||||
; CHECK: if (q[[Q0]]) vmem(r1+#0) = v[[V4]]
|
||||
define void @f6(<8 x i8>* %a0, <8 x i32>* %a1) #0 {
|
||||
%v0 = load <8 x i8>, <8 x i8>* %a0, align 128
|
||||
%v1 = sext <8 x i8> %v0 to <8 x i32>
|
||||
store <8 x i32> %v1, <8 x i32>* %a1, align 128
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "target-cpu"="hexagonv65" "target-features"="+hvx,+hvx-length128b,-packets" }
|
||||
|
Loading…
Reference in New Issue