forked from OSchip/llvm-project
Fix PR15267
- When extloading from a vector with non-byte-addressable element, e.g. <4 x i1>, the current logic breaks. Extend the current logic to fix the case where the element type is not byte-addressable by loading all bytes, bit-extracting/packing each element. llvm-svn: 175642
This commit is contained in:
parent
a018cfd10c
commit
7fb39669ef
|
@ -363,30 +363,135 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
|
|||
EVT SrcVT = LD->getMemoryVT();
|
||||
ISD::LoadExtType ExtType = LD->getExtensionType();
|
||||
|
||||
SmallVector<SDValue, 8> LoadVals;
|
||||
SmallVector<SDValue, 8> Vals;
|
||||
SmallVector<SDValue, 8> LoadChains;
|
||||
unsigned NumElem = SrcVT.getVectorNumElements();
|
||||
unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
|
||||
|
||||
for (unsigned Idx=0; Idx<NumElem; Idx++) {
|
||||
SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
|
||||
Op.getNode()->getValueType(0).getScalarType(),
|
||||
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
|
||||
SrcVT.getScalarType(),
|
||||
LD->isVolatile(), LD->isNonTemporal(),
|
||||
LD->getAlignment());
|
||||
EVT SrcEltVT = SrcVT.getScalarType();
|
||||
EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType();
|
||||
|
||||
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
|
||||
DAG.getIntPtrConstant(Stride));
|
||||
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
|
||||
// When elements in a vector is not byte-addressable, we cannot directly
|
||||
// load each element by advancing pointer, which could only address bytes.
|
||||
// Instead, we load all significant words, mask bits off, and concatenate
|
||||
// them to form each element. Finally, they are extended to destination
|
||||
// scalar type to build the destination vector.
|
||||
EVT WideVT = TLI.getPointerTy();
|
||||
|
||||
LoadVals.push_back(ScalarLoad.getValue(0));
|
||||
LoadChains.push_back(ScalarLoad.getValue(1));
|
||||
assert(WideVT.isRound() &&
|
||||
"Could not handle the sophisticated case when the widest integer is"
|
||||
" not power of 2.");
|
||||
assert(WideVT.bitsGE(SrcEltVT) &&
|
||||
"Type is not legalized?");
|
||||
|
||||
unsigned WideBytes = WideVT.getStoreSize();
|
||||
unsigned Offset = 0;
|
||||
unsigned RemainingBytes = SrcVT.getStoreSize();
|
||||
SmallVector<SDValue, 8> LoadVals;
|
||||
|
||||
while (RemainingBytes > 0) {
|
||||
SDValue ScalarLoad;
|
||||
unsigned LoadBytes = WideBytes;
|
||||
|
||||
if (RemainingBytes >= LoadBytes) {
|
||||
ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR,
|
||||
LD->getPointerInfo().getWithOffset(Offset),
|
||||
LD->isVolatile(), LD->isNonTemporal(),
|
||||
LD->isInvariant(), LD->getAlignment());
|
||||
} else {
|
||||
EVT LoadVT = WideVT;
|
||||
while (RemainingBytes < LoadBytes) {
|
||||
LoadBytes >>= 1; // Reduce the load size by half.
|
||||
LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
|
||||
}
|
||||
ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
|
||||
LD->getPointerInfo().getWithOffset(Offset),
|
||||
LoadVT, LD->isVolatile(),
|
||||
LD->isNonTemporal(), LD->getAlignment());
|
||||
}
|
||||
|
||||
RemainingBytes -= LoadBytes;
|
||||
Offset += LoadBytes;
|
||||
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
|
||||
DAG.getIntPtrConstant(LoadBytes));
|
||||
|
||||
LoadVals.push_back(ScalarLoad.getValue(0));
|
||||
LoadChains.push_back(ScalarLoad.getValue(1));
|
||||
}
|
||||
|
||||
// Extract bits, pack and extend/trunc them into destination type.
|
||||
unsigned SrcEltBits = SrcEltVT.getSizeInBits();
|
||||
SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT);
|
||||
|
||||
unsigned BitOffset = 0;
|
||||
unsigned WideIdx = 0;
|
||||
unsigned WideBits = WideVT.getSizeInBits();
|
||||
|
||||
for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
|
||||
SDValue Lo, Hi, ShAmt;
|
||||
|
||||
if (BitOffset < WideBits) {
|
||||
ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT));
|
||||
Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
|
||||
Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
|
||||
}
|
||||
|
||||
BitOffset += SrcEltBits;
|
||||
if (BitOffset >= WideBits) {
|
||||
WideIdx++;
|
||||
Offset -= WideBits;
|
||||
if (Offset > 0) {
|
||||
ShAmt = DAG.getConstant(SrcEltBits - Offset,
|
||||
TLI.getShiftAmountTy(WideVT));
|
||||
Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
|
||||
Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
|
||||
}
|
||||
}
|
||||
|
||||
if (Hi.getNode())
|
||||
Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
|
||||
|
||||
switch (ExtType) {
|
||||
default: llvm_unreachable("Unknown extended-load op!");
|
||||
case ISD::EXTLOAD:
|
||||
Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
|
||||
break;
|
||||
case ISD::ZEXTLOAD:
|
||||
Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
|
||||
break;
|
||||
case ISD::SEXTLOAD:
|
||||
ShAmt = DAG.getConstant(WideBits - SrcEltBits,
|
||||
TLI.getShiftAmountTy(WideVT));
|
||||
Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
|
||||
Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
|
||||
Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
|
||||
break;
|
||||
}
|
||||
Vals.push_back(Lo);
|
||||
}
|
||||
} else {
|
||||
unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
|
||||
|
||||
for (unsigned Idx=0; Idx<NumElem; Idx++) {
|
||||
SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
|
||||
Op.getNode()->getValueType(0).getScalarType(),
|
||||
Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
|
||||
SrcVT.getScalarType(),
|
||||
LD->isVolatile(), LD->isNonTemporal(),
|
||||
LD->getAlignment());
|
||||
|
||||
BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
|
||||
DAG.getIntPtrConstant(Stride));
|
||||
|
||||
Vals.push_back(ScalarLoad.getValue(0));
|
||||
LoadChains.push_back(ScalarLoad.getValue(1));
|
||||
}
|
||||
}
|
||||
|
||||
SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
&LoadChains[0], LoadChains.size());
|
||||
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
|
||||
Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
|
||||
Op.getNode()->getValueType(0), &Vals[0], Vals.size());
|
||||
|
||||
AddLegalizedOperand(Op.getValue(0), Value);
|
||||
AddLegalizedOperand(Op.getValue(1), NewChain);
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
define <4 x i3> @test1(<4 x i3>* %in) nounwind {
|
||||
%ret = load <4 x i3>* %in, align 1
|
||||
ret <4 x i3> %ret
|
||||
}
|
||||
|
||||
; CHECK: test1
|
||||
; CHECK: movzwl
|
||||
; CHECK: shrl $3
|
||||
; CHECK: andl $7
|
||||
; CHECK: andl $7
|
||||
; CHECK: vmovd
|
||||
; CHECK: pinsrd $1
|
||||
; CHECK: shrl $6
|
||||
; CHECK: andl $7
|
||||
; CHECK: pinsrd $2
|
||||
; CHECK: shrl $9
|
||||
; CHECK: andl $7
|
||||
; CHECK: pinsrd $3
|
||||
; CHECK: ret
|
||||
|
||||
define <4 x i1> @test2(<4 x i1>* %in) nounwind {
|
||||
%ret = load <4 x i1>* %in, align 1
|
||||
ret <4 x i1> %ret
|
||||
}
|
||||
|
||||
; CHECK: test2
|
||||
; CHECK: movzbl
|
||||
; CHECK: shrl
|
||||
; CHECK: andl $1
|
||||
; CHECK: andl $1
|
||||
; CHECK: vmovd
|
||||
; CHECK: pinsrd $1
|
||||
; CHECK: shrl $2
|
||||
; CHECK: andl $1
|
||||
; CHECK: pinsrd $2
|
||||
; CHECK: shrl $3
|
||||
; CHECK: andl $1
|
||||
; CHECK: pinsrd $3
|
||||
; CHECK: ret
|
||||
|
||||
define <4 x i64> @test3(<4 x i1>* %in) nounwind {
|
||||
%wide.load35 = load <4 x i1>* %in, align 1
|
||||
%sext = sext <4 x i1> %wide.load35 to <4 x i64>
|
||||
ret <4 x i64> %sext
|
||||
}
|
||||
|
||||
; CHECK: test3
|
||||
; CHECK: movzbl
|
||||
; CHECK: shrl
|
||||
; CHECK: andl $1
|
||||
; CHECK: andl $1
|
||||
; CHECK: vmovd
|
||||
; CHECK: pinsrd $1
|
||||
; CHECK: shrl $2
|
||||
; CHECK: andl $1
|
||||
; CHECK: pinsrd $2
|
||||
; CHECK: shrl $3
|
||||
; CHECK: andl $1
|
||||
; CHECK: pinsrd $3
|
||||
; CHECK: pslld
|
||||
; CHECK: psrad
|
||||
; CHECK: pmovsxdq
|
||||
; CHECK: pmovsxdq
|
||||
; CHECK: ret
|
Loading…
Reference in New Issue