The previous fix of widening divides that trap was too fragile as it depends on custom

lowering and requires that certain types exist in ValueTypes.h.  Modified widening to
check if an op can trap and if so, the widening algorithm will apply only the op on
the defined elements.  It is safer to do this in widening because the optimizer can't
guarantee removing unused ops in some cases.

llvm-svn: 95823
This commit is contained in:
Mon P Wang 2010-02-10 23:37:45 +00:00
parent 989908fbac
commit 5b77f0dac1
5 changed files with 141 additions and 24 deletions

View File

@ -346,6 +346,11 @@ public:
return true;
}
/// canOpTrap - Returns true if the operation can trap for the value type.
/// VT must be a legal type. By default, we optimistically assume most
/// operations don't trap except for divide and remainder.
virtual bool canOpTrap(unsigned Op, EVT VT) const;
/// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
/// used by Targets can use this to indicate if there is a suitable
/// VECTOR_SHUFFLE that can be used to replace a VAND with a constant

View File

@ -1242,10 +1242,96 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
// Binary op widening.
unsigned Opcode = N->getOpcode();
DebugLoc dl = N->getDebugLoc();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp1, InOp2);
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
// Operation doesn't trap so just widen as normal.
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
} else if (NumElts == 1) {
// No legal vector version so unroll the vector operation and then widen.
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
} else {
// Since the operation can trap, apply operation on the original vector.
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
SmallVector<SDValue, 16> ConcatOps(CurNumElts);
unsigned ConcatEnd = 0; // Current ConcatOps index.
unsigned Idx = 0; // Current Idx into input vectors.
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
DAG.getIntPtrConstant(Idx));
SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
DAG.getIntPtrConstant(Idx));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
Idx += NumElts;
CurNumElts -= NumElts;
}
EVT PrevVecVT = VT;
do {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
} while (!TLI.isTypeLegal(VT) && NumElts != 1);
if (NumElts == 1) {
// Since we are using concat vector, build a vector from the scalar ops.
SDValue VecOp = DAG.getUNDEF(PrevVecVT);
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp1, DAG.getIntPtrConstant(Idx));
SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp2, DAG.getIntPtrConstant(Idx));
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp,
DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2),
DAG.getIntPtrConstant(i));
}
CurNumElts = 0;
ConcatOps[ConcatEnd++] = VecOp;
}
}
// Check to see if we have a single operation with the widen type.
if (ConcatEnd == 1) {
VT = ConcatOps[0].getValueType();
if (VT == WidenVT)
return ConcatOps[0];
}
// Rebuild vector to one with the widen type
Idx = ConcatEnd - 1;
while (Idx != 0) {
VT = ConcatOps[Idx--].getValueType();
while (Idx != 0 && ConcatOps[Idx].getValueType() == VT)
--Idx;
if (Idx != 0) {
VT = ConcatOps[Idx].getValueType();
ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
&ConcatOps[Idx+1], ConcatEnd - Idx - 1);
ConcatEnd = Idx + 2;
}
}
unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements();
if (NumOps != ConcatEnd ) {
SDValue UndefVal = DAG.getUNDEF(VT);
for (unsigned j = ConcatEnd; j < NumOps; ++j)
ConcatOps[j] = UndefVal;
}
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
}
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {

View File

@ -540,6 +540,24 @@ TargetLowering::~TargetLowering() {
delete &TLOF;
}
/// canOpTrap - Returns true if the operation can trap for the value type.
/// VT must be a legal type.
bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
assert(isTypeLegal(VT));
switch (Op) {
default:
return false;
case ISD::FDIV:
case ISD::FREM:
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
return true;
}
}
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
unsigned &NumIntermediates,
EVT &RegisterVT,

View File

@ -1001,19 +1001,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
computeRegisterProperties();
// Divide and reminder operations have no vector equivalent and can
// trap. Do a custom widening for these operations in which we never
// generate more divides/remainder than the original vector width.
for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
if (!isTypeLegal((MVT::SimpleValueType)VT)) {
setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom);
setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom);
setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom);
setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom);
}
}
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
@ -7572,14 +7559,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(edx.getValue(1));
return;
}
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM: {
EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()));
return;
}
case ISD::ATOMIC_CMP_SWAP: {
EVT T = N->getValueType(0);
assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");

View File

@ -152,3 +152,32 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
%rem.r = urem <5 x i64> %num, %rem
ret <5 x i64> %rem.r
}
define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
; CHECK: idivl
; CHECK: idivl
; CHECK: idivl
; CHECK-NOT: idivl
; CHECK: ret
entry:
%cmp13 = icmp sgt i32 %n, 0
br i1 %cmp13, label %bb.nph, label %for.end
bb.nph:
br label %for.body
for.body:
%i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ]
%arrayidx11 = getelementptr <3 x i32>* %dest, i32 %i.014
%tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
%arrayidx7 = getelementptr inbounds <3 x i32>* %old, i32 %i.014
%tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
%div = sdiv <3 x i32> %tmp4, %tmp8
store <3 x i32> %div, <3 x i32>* %arrayidx11
%inc = add nsw i32 %i.014, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}