forked from OSchip/llvm-project
[X86] Directly form VBROADCAST_LOAD in lowerShuffleAsBroadcast on AVX targets.
If we would emit a VBROADCAST node, we can instead directly emit a VBROADCAST_LOAD. This allows us to get rid of the special case to use an f64 load on 32-bit targets for vXi64. I believe there is more cleanup we can do later in this function, but I'll do that in follow ups.
This commit is contained in:
parent
f24d90c0a6
commit
06de426426
|
@ -12980,14 +12980,6 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
|
||||||
// is expected to be a win for code size, register pressure, and possibly
|
// is expected to be a win for code size, register pressure, and possibly
|
||||||
// uops even if the original vector load is not eliminated.
|
// uops even if the original vector load is not eliminated.
|
||||||
|
|
||||||
// 32-bit targets need to load i64 as a f64 and then bitcast the result.
|
|
||||||
if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) {
|
|
||||||
BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
|
|
||||||
Opcode = (BroadcastVT.is128BitVector() && !Subtarget.hasAVX2())
|
|
||||||
? X86ISD::MOVDDUP
|
|
||||||
: Opcode;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reduce the vector load and shuffle to a broadcasted scalar load.
|
// Reduce the vector load and shuffle to a broadcasted scalar load.
|
||||||
LoadSDNode *Ld = cast<LoadSDNode>(V);
|
LoadSDNode *Ld = cast<LoadSDNode>(V);
|
||||||
SDValue BaseAddr = Ld->getOperand(1);
|
SDValue BaseAddr = Ld->getOperand(1);
|
||||||
|
@ -12995,6 +12987,21 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
|
||||||
unsigned Offset = BroadcastIdx * SVT.getStoreSize();
|
unsigned Offset = BroadcastIdx * SVT.getStoreSize();
|
||||||
assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
|
assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset");
|
||||||
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
|
SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
|
||||||
|
|
||||||
|
// Directly form VBROADCAST_LOAD if we're using VBROADCAST opcode rather
|
||||||
|
// than MOVDDUP.
|
||||||
|
// FIXME: Should we add VBROADCAST_LOAD isel patterns for pre-AVX?
|
||||||
|
if (Opcode == X86ISD::VBROADCAST) {
|
||||||
|
SDVTList Tys = DAG.getVTList(BroadcastVT, MVT::Other);
|
||||||
|
SDValue Ops[] = {Ld->getChain(), NewAddr};
|
||||||
|
V = DAG.getMemIntrinsicNode(
|
||||||
|
X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT,
|
||||||
|
DAG.getMachineFunction().getMachineMemOperand(
|
||||||
|
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
|
||||||
|
DAG.makeEquivalentMemoryOrdering(Ld, V);
|
||||||
|
return DAG.getBitcast(VT, V);
|
||||||
|
}
|
||||||
|
assert(SVT == MVT::f64 && "Unexpected VT!");
|
||||||
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
|
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
|
||||||
DAG.getMachineFunction().getMachineMemOperand(
|
DAG.getMachineFunction().getMachineMemOperand(
|
||||||
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
|
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
|
||||||
|
|
Loading…
Reference in New Issue