[Hexagon] Improve HVX instruction selection (bitcast, vsplat)

There was some unfortunate interaction between VSPLAT and BITCAST
related to the selection of constant vectors (coming from selecting
shuffles). Introduce VSPLATW that always splats a 32-bit word, and
can have arbitrary result type (to avoid BITCASTs of VSPLAT).
Clean up the previous selection of BITCAST/VSPLAT.

llvm-svn: 330471
This commit is contained in:
Krzysztof Parzyszek 2018-04-20 19:38:37 +00:00
parent aadbabc070
commit 41a24b7b13
10 changed files with 169 additions and 71 deletions

View File

@ -758,22 +758,6 @@ void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
ReplaceNode(N, R); ReplaceNode(N, R);
} }
void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) {
EVT SVT = N->getOperand(0).getValueType();
EVT DVT = N->getValueType(0);
if (!SVT.isVector() || !DVT.isVector() ||
SVT.getVectorElementType() == MVT::i1 ||
DVT.getVectorElementType() == MVT::i1 ||
SVT.getSizeInBits() != DVT.getSizeInBits()) {
SelectCode(N);
return;
}
ReplaceUses(SDValue(N, 0), N->getOperand(0));
CurDAG->RemoveDeadNode(N);
}
void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) { void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) {
MVT ResTy = N->getValueType(0).getSimpleVT(); MVT ResTy = N->getValueType(0).getSimpleVT();
if (HST->isHVXVectorType(ResTy, true)) if (HST->isHVXVectorType(ResTy, true))
@ -882,7 +866,6 @@ void HexagonDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant: return SelectConstant(N); case ISD::Constant: return SelectConstant(N);
case ISD::ConstantFP: return SelectConstantFP(N); case ISD::ConstantFP: return SelectConstantFP(N);
case ISD::FrameIndex: return SelectFrameIndex(N); case ISD::FrameIndex: return SelectFrameIndex(N);
case ISD::BITCAST: return SelectBitcast(N);
case ISD::SHL: return SelectSHL(N); case ISD::SHL: return SelectSHL(N);
case ISD::LOAD: return SelectLoad(N); case ISD::LOAD: return SelectLoad(N);
case ISD::STORE: return SelectStore(N); case ISD::STORE: return SelectStore(N);

View File

@ -102,7 +102,6 @@ public:
void SelectIntrinsicWOChain(SDNode *N); void SelectIntrinsicWOChain(SDNode *N);
void SelectConstant(SDNode *N); void SelectConstant(SDNode *N);
void SelectConstantFP(SDNode *N); void SelectConstantFP(SDNode *N);
void SelectBitcast(SDNode *N);
void SelectV65Gather(SDNode *N); void SelectV65Gather(SDNode *N);
void SelectV65GatherPred(SDNode *N); void SelectV65GatherPred(SDNode *N);
void SelectHVXDualOutput(SDNode *N); void SelectHVXDualOutput(SDNode *N);

View File

@ -919,36 +919,40 @@ static bool isPermutation(ArrayRef<int> Mask) {
} }
bool HvxSelector::selectVectorConstants(SDNode *N) { bool HvxSelector::selectVectorConstants(SDNode *N) {
// Constant vectors are generated as loads from constant pools or // Constant vectors are generated as loads from constant pools or as
// as VSPLATs of a constant value. // splats of a constant value. Since they are generated during the
// Since they are generated during the selection process, the main // selection process, the main selection algorithm is not aware of them.
// selection algorithm is not aware of them. Select them directly // Select them directly here.
// here.
SmallVector<SDNode*,4> Nodes; SmallVector<SDNode*,4> Nodes;
SetVector<SDNode*> WorkQ; SetVector<SDNode*> WorkQ;
// The one-use test for VSPLATW's operand may fail due to dead nodes
// left over in the DAG.
DAG.RemoveDeadNodes();
// The DAG can change (due to CSE) during selection, so cache all the // The DAG can change (due to CSE) during selection, so cache all the
// unselected nodes first to avoid traversing a mutating DAG. // unselected nodes first to avoid traversing a mutating DAG.
auto IsNodeToSelect = [] (SDNode *N) { auto IsNodeToSelect = [] (SDNode *N) {
if (N->isMachineOpcode()) if (N->isMachineOpcode())
return false; return false;
unsigned Opc = N->getOpcode(); switch (N->getOpcode()) {
if (Opc == HexagonISD::VSPLAT || Opc == HexagonISD::VZERO) case HexagonISD::VZERO:
return true; case HexagonISD::VSPLATW:
if (Opc == ISD::BITCAST) {
// Only select bitcasts of VSPLATs.
if (N->getOperand(0).getOpcode() == HexagonISD::VSPLAT)
return true; return true;
case ISD::LOAD: {
SDValue Addr = cast<LoadSDNode>(N)->getBasePtr();
unsigned AddrOpc = Addr.getOpcode();
if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP)
if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool)
return true;
}
break;
} }
if (Opc == ISD::LOAD) { // Make sure to select the operand of VSPLATW.
SDValue Addr = cast<LoadSDNode>(N)->getBasePtr(); bool IsSplatOp = N->hasOneUse() &&
unsigned AddrOpc = Addr.getOpcode(); N->use_begin()->getOpcode() == HexagonISD::VSPLATW;
if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP) return IsSplatOp;
if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool)
return true;
}
return false;
}; };
WorkQ.insert(N); WorkQ.insert(N);

View File

@ -1731,6 +1731,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VROR: return "HexagonISD::VROR"; case HexagonISD::VROR: return "HexagonISD::VROR";
case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
case HexagonISD::VZERO: return "HexagonISD::VZERO"; case HexagonISD::VZERO: return "HexagonISD::VZERO";
case HexagonISD::VSPLATW: return "HexagonISD::VSPLATW";
case HexagonISD::D2P: return "HexagonISD::D2P"; case HexagonISD::D2P: return "HexagonISD::D2P";
case HexagonISD::P2D: return "HexagonISD::P2D"; case HexagonISD::P2D: return "HexagonISD::P2D";
case HexagonISD::V2Q: return "HexagonISD::V2Q"; case HexagonISD::V2Q: return "HexagonISD::V2Q";

View File

@ -51,7 +51,8 @@ namespace HexagonISD {
CP, // Constant pool. CP, // Constant pool.
COMBINE, COMBINE,
VSPLAT, VSPLAT, // Generic splat, selection depends on argument/return
// types.
VASL, VASL,
VASR, VASR,
VLSR, VLSR,
@ -77,6 +78,7 @@ namespace HexagonISD {
QTRUE, QTRUE,
QFALSE, QFALSE,
VZERO, VZERO,
VSPLATW, // HVX splat of a 32-bit word with an arbitrary result type.
TYPECAST, // No-op that's used to convert between different legal TYPECAST, // No-op that's used to convert between different legal
// types in a register. // types in a register.
VALIGN, // Align two vectors (in Op0, Op1) to one that would have VALIGN, // Align two vectors (in Op0, Op1) to one that would have

View File

@ -367,9 +367,7 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode()); auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode());
if (IdxN && IdxN->isNullValue()) if (IdxN && IdxN->isNullValue())
return getZero(dl, VecTy, DAG); return getZero(dl, VecTy, DAG);
MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); return DAG.getNode(HexagonISD::VSPLATW, dl, VecTy, SplatV);
SDValue SV = DAG.getNode(HexagonISD::VSPLAT, dl, WordTy, SplatV);
return DAG.getBitcast(VecTy, SV);
} }
// Delay recognizing constant vectors until here, so that we can generate // Delay recognizing constant vectors until here, so that we can generate

View File

@ -9,7 +9,10 @@ def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>;
def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
[SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
def SDTHexagonVSPLATW: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def HexagonVSPLATW: SDNode<"HexagonISD::VSPLATW", SDTHexagonVSPLATW>;
def HwLen2: SDNodeXForm<imm, [{ def HwLen2: SDNodeXForm<imm, [{
const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget()); const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget());
@ -157,6 +160,26 @@ let Predicates = [UseHVX] in {
defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI32>; defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI32>;
} }
// Bitcasts between same-size vector types are no-ops, except for the
// actual type change.
class Bitcast<ValueType ResTy, ValueType InpTy, RegisterClass RC>
: Pat<(ResTy (bitconvert (InpTy RC:$Val))), (ResTy RC:$Val)>;
let Predicates = [UseHVX] in {
def: Bitcast<VecI8, VecI16, HvxVR>;
def: Bitcast<VecI8, VecI32, HvxVR>;
def: Bitcast<VecI16, VecI8, HvxVR>;
def: Bitcast<VecI16, VecI32, HvxVR>;
def: Bitcast<VecI32, VecI8, HvxVR>;
def: Bitcast<VecI32, VecI16, HvxVR>;
def: Bitcast<VecPI8, VecPI16, HvxWR>;
def: Bitcast<VecPI8, VecPI32, HvxWR>;
def: Bitcast<VecPI16, VecPI8, HvxWR>;
def: Bitcast<VecPI16, VecPI32, HvxWR>;
def: Bitcast<VecPI32, VecPI8, HvxWR>;
def: Bitcast<VecPI32, VecPI16, HvxWR>;
}
let Predicates = [UseHVX] in { let Predicates = [UseHVX] in {
def: Pat<(VecI8 vzero), (V6_vd0)>; def: Pat<(VecI8 vzero), (V6_vd0)>;
@ -190,38 +213,44 @@ let Predicates = [UseHVX] in {
(V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt),
(V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
}
def Vsplatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>;
def Vsplatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>;
def Vsplatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>;
def Vsplatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>;
def Vsplatrh: OutPatFrag<(ops node:$Rs),
(V6_lvsplatw (A2_combine_ll $Rs, $Rs))>;
def Vsplatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>;
let Predicates = [UseHVX] in {
let AddedComplexity = 10 in { let AddedComplexity = 10 in {
def: Pat<(VecI8 (HexagonVSPLAT u8_0ImmPred:$V)), def: Pat<(VecI8 (HexagonVSPLAT u8_0ImmPred:$V)), (Vsplatib $V)>;
(V6_lvsplatw (ToI32 (SplatB $V)))>; def: Pat<(VecI16 (HexagonVSPLAT u16_0ImmPred:$V)), (Vsplatih $V)>;
def: Pat<(VecI16 (HexagonVSPLAT u16_0ImmPred:$V)), def: Pat<(VecI32 (HexagonVSPLAT anyimm:$V)), (Vsplatiw $V)>;
(V6_lvsplatw (ToI32 (SplatH $V)))>; def: Pat<(VecPI8 (HexagonVSPLAT u8_0ImmPred:$V)), (Rep (Vsplatib $V))>;
def: Pat<(VecI32 (HexagonVSPLAT anyimm:$V)), def: Pat<(VecPI16 (HexagonVSPLAT u16_0ImmPred:$V)), (Rep (Vsplatih $V))>;
(V6_lvsplatw (ToI32 $V))>; def: Pat<(VecPI32 (HexagonVSPLAT anyimm:$V)), (Rep (Vsplatiw $V))>;
def: Pat<(VecPI8 (HexagonVSPLAT u8_0ImmPred:$V)),
(Combinev (V6_lvsplatw (ToI32 (SplatB $V))),
(V6_lvsplatw (ToI32 (SplatB $V))))>;
def: Pat<(VecPI16 (HexagonVSPLAT u16_0ImmPred:$V)),
(Combinev (V6_lvsplatw (ToI32 (SplatH $V))),
(V6_lvsplatw (ToI32 (SplatH $V))))>;
def: Pat<(VecPI32 (HexagonVSPLAT anyimm:$V)),
(Combinev (V6_lvsplatw (ToI32 $V)), (V6_lvsplatw (ToI32 $V)))>;
} }
def: Pat<(VecI8 (HexagonVSPLAT I32:$Rs)), def: Pat<(VecI8 (HexagonVSPLAT I32:$Rs)), (Vsplatrb $Rs)>;
(V6_lvsplatw (S2_vsplatrb I32:$Rs))>; def: Pat<(VecI16 (HexagonVSPLAT I32:$Rs)), (Vsplatrh $Rs)>;
def: Pat<(VecI16 (HexagonVSPLAT I32:$Rs)), def: Pat<(VecI32 (HexagonVSPLAT I32:$Rs)), (Vsplatrw $Rs)>;
(V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs))>; def: Pat<(VecPI8 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrb $Rs))>;
def: Pat<(VecI32 (HexagonVSPLAT I32:$Rs)), def: Pat<(VecPI16 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrh $Rs))>;
(V6_lvsplatw I32:$Rs)>; def: Pat<(VecPI32 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrw $Rs))>;
def: Pat<(VecPI8 (HexagonVSPLAT I32:$Rs)),
(Combinev (V6_lvsplatw (S2_vsplatrb I32:$Rs)),
(V6_lvsplatw (S2_vsplatrb I32:$Rs)))>;
def: Pat<(VecPI16 (HexagonVSPLAT I32:$Rs)),
(Combinev (V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs)),
(V6_lvsplatw (A2_combine_ll I32:$Rs, I32:$Rs)))>;
def: Pat<(VecPI32 (HexagonVSPLAT I32:$Rs)),
(Combinev (V6_lvsplatw I32:$Rs), (V6_lvsplatw I32:$Rs))>;
def: Pat<(VecI8 (HexagonVSPLATW I32:$Rs)), (V6_lvsplatw I32:$Rs)>;
def: Pat<(VecI16 (HexagonVSPLATW I32:$Rs)), (V6_lvsplatw I32:$Rs)>;
def: Pat<(VecI32 (HexagonVSPLATW I32:$Rs)), (V6_lvsplatw I32:$Rs)>;
def: Pat<(VecPI8 (HexagonVSPLATW I32:$Rs)), (Rep (V6_lvsplatw I32:$Rs))>;
def: Pat<(VecPI16 (HexagonVSPLATW I32:$Rs)), (Rep (V6_lvsplatw I32:$Rs))>;
def: Pat<(VecPI32 (HexagonVSPLATW I32:$Rs)), (Rep (V6_lvsplatw I32:$Rs))>;
}
let Predicates = [UseHVX] in {
def: Pat<(add HVI8:$Vs, HVI8:$Vt), (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(add HVI8:$Vs, HVI8:$Vt), (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(add HVI16:$Vs, HVI16:$Vt), (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(add HVI16:$Vs, HVI16:$Vt), (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(add HVI32:$Vs, HVI32:$Vt), (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(add HVI32:$Vs, HVI32:$Vt), (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>;
@ -237,8 +266,14 @@ let Predicates = [UseHVX] in {
def: Pat<(sub HWI32:$Vs, HWI32:$Vt), (V6_vsubw_dv HvxWR:$Vs, HvxWR:$Vt)>; def: Pat<(sub HWI32:$Vs, HWI32:$Vt), (V6_vsubw_dv HvxWR:$Vs, HvxWR:$Vt)>;
def: Pat<(and HVI8:$Vs, HVI8:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(and HVI8:$Vs, HVI8:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(and HVI16:$Vs, HVI16:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(and HVI32:$Vs, HVI32:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(or HVI8:$Vs, HVI8:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(or HVI8:$Vs, HVI8:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(or HVI16:$Vs, HVI16:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(or HVI32:$Vs, HVI32:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(xor HVI8:$Vs, HVI8:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>; def: Pat<(xor HVI8:$Vs, HVI8:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(xor HVI16:$Vs, HVI16:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(xor HVI32:$Vs, HVI32:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>;
def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt),
(V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;

View File

@ -28,7 +28,7 @@ using namespace llvm;
#define DEBUG_TYPE "hexagontti" #define DEBUG_TYPE "hexagontti"
static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false), static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(true),
cl::Hidden, cl::desc("Enable loop vectorizer for HVX")); cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables", static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",

View File

@ -0,0 +1,50 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; This testcase exposed a problem with a previous handling of selecting
; constant vectors (for vdelta). Originally a bitcast of a vsplat was
; created (both being ISD, not machine nodes). Selection of vsplat relies
; on its return type, and there was no way to get these nodes to be
; selected in the right order, without getting the main selection algorithm
; confused.
; Make sure this compiles successfully.
; CHECK: call f1
target triple = "hexagon"
%s.0 = type { %s.1 }
%s.1 = type { i32, i8* }
%s.2 = type { i8, i8, [16 x i8], i8, [16 x i8] }
; Function Attrs: nounwind
define dso_local zeroext i8 @f0(i8 zeroext %a0, %s.2* nocapture readonly %a1, i8 signext %a2) local_unnamed_addr #0 {
b0:
br i1 undef, label %b2, label %b1
b1: ; preds = %b0
%v0 = load <64 x i8>, <64 x i8>* undef, align 1
%v1 = icmp ult <64 x i8> %v0, <i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52, i8 52>
%v2 = xor <64 x i1> %v1, zeroinitializer
%v3 = select <64 x i1> %v2, <64 x i32> undef, <64 x i32> zeroinitializer
%v4 = select <64 x i1> zeroinitializer, <64 x i32> <i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000, i32 304000>, <64 x i32> %v3
%v5 = add <64 x i32> %v4, zeroinitializer
br label %b2
b2: ; preds = %b1, %b0
%v6 = phi <64 x i32> [ undef, %b0 ], [ %v5, %b1 ]
%v7 = add <64 x i32> %v6, undef
%v8 = add <64 x i32> %v7, undef
%v9 = add <64 x i32> %v8, undef
%v10 = add <64 x i32> %v9, undef
%v11 = add <64 x i32> %v10, undef
%v12 = add <64 x i32> %v11, undef
%v13 = extractelement <64 x i32> %v12, i32 0
tail call void @f1(%s.0* null, i32 undef, i32 undef, i32 %v13, i32 undef) #2
unreachable
}
declare dso_local void @f1(%s.0*, i32, i32, i32, i32) local_unnamed_addr #1
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
attributes #1 = { "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }
attributes #2 = { nounwind }

View File

@ -0,0 +1,26 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; Check that this compiles successfully.
; CHECK: vsplat
target triple = "hexagon"
; Function Attrs: norecurse nounwind
define dso_local i32 @f0(i32* nocapture %a0, i32* nocapture readonly %a1, i32* nocapture readonly %a2, i32 %a3) local_unnamed_addr #0 {
b0:
%v0 = insertelement <16 x i32> undef, i32 %a3, i32 0
%v1 = shufflevector <16 x i32> %v0, <16 x i32> undef, <16 x i32> zeroinitializer
%v2 = add i32 %a3, 64
%v3 = add <16 x i32> %v1, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
%v4 = sdiv <16 x i32> %v3, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
%v5 = add nsw <16 x i32> %v4, <i32 1000, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%v6 = shufflevector <16 x i32> %v5, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%v7 = add <16 x i32> %v5, %v6
%v8 = extractelement <16 x i32> %v7, i32 0
%v9 = add nsw i32 %v2, 1
%v10 = sdiv i32 %v9, 23
%v11 = add i32 %v8, %v10
ret i32 %v11
}
attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }