forked from OSchip/llvm-project
Fix UMULO support for 2x register width to allow the full
range without a libcall to a new mulo<mode> libcall that we'd have to create. Finishes the rest of rdar://9090077 and rdar://9210061 llvm-svn: 133318
This commit is contained in:
parent
4e4294bdee
commit
e4a1266a9a
|
@ -2160,6 +2160,27 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
|
|||
const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
|
||||
// A divide for UMULO should be faster than a function call.
|
||||
if (N->getOpcode() == ISD::UMULO) {
|
||||
SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
|
||||
SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
|
||||
SplitInteger(MUL, Lo, Hi);
|
||||
|
||||
// A divide for UMULO will be faster than a function call. Select to
|
||||
// make sure we aren't using 0.
|
||||
SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
|
||||
RHS, DAG.getConstant(0, VT), ISD::SETNE);
|
||||
SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
|
||||
DAG.getConstant(1, VT), RHS);
|
||||
SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
|
||||
SDValue Overflow;
|
||||
Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
|
||||
ReplaceValueWith(SDValue(N, 1), Overflow);
|
||||
return;
|
||||
}
|
||||
|
||||
// Replace this with a libcall that will check overflow.
|
||||
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
|
||||
if (VT == MVT::i32)
|
||||
|
|
|
@ -2,9 +2,8 @@
|
|||
%0 = type { i64, i64 }
|
||||
%1 = type { i128, i1 }
|
||||
|
||||
@.str = private unnamed_addr constant [11 x i8] c"%llx %llx\0A\00", align 1
|
||||
|
||||
define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
|
||||
; CHECK: x
|
||||
entry:
|
||||
%tmp16 = zext i64 %a.coerce0 to i128
|
||||
%tmp11 = zext i64 %a.coerce1 to i128
|
||||
|
@ -33,6 +32,50 @@ nooverflow: ; preds = %entry
|
|||
ret %0 %tmp24
|
||||
}
|
||||
|
||||
define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
|
||||
entry:
|
||||
; CHECK: foo
|
||||
%retval = alloca i128, align 16
|
||||
%coerce = alloca i128, align 16
|
||||
%a.addr = alloca i128, align 16
|
||||
%coerce1 = alloca i128, align 16
|
||||
%b.addr = alloca i128, align 16
|
||||
%0 = bitcast i128* %coerce to %0*
|
||||
%1 = getelementptr %0* %0, i32 0, i32 0
|
||||
store i64 %a.coerce0, i64* %1
|
||||
%2 = getelementptr %0* %0, i32 0, i32 1
|
||||
store i64 %a.coerce1, i64* %2
|
||||
%a = load i128* %coerce, align 16
|
||||
store i128 %a, i128* %a.addr, align 16
|
||||
%3 = bitcast i128* %coerce1 to %0*
|
||||
%4 = getelementptr %0* %3, i32 0, i32 0
|
||||
store i64 %b.coerce0, i64* %4
|
||||
%5 = getelementptr %0* %3, i32 0, i32 1
|
||||
store i64 %b.coerce1, i64* %5
|
||||
%b = load i128* %coerce1, align 16
|
||||
store i128 %b, i128* %b.addr, align 16
|
||||
%tmp = load i128* %a.addr, align 16
|
||||
%tmp2 = load i128* %b.addr, align 16
|
||||
%6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
|
||||
; CHECK: cmov
|
||||
; CHECK: divti3
|
||||
%7 = extractvalue %1 %6, 0
|
||||
%8 = extractvalue %1 %6, 1
|
||||
br i1 %8, label %overflow, label %nooverflow
|
||||
|
||||
overflow: ; preds = %entry
|
||||
call void @llvm.trap()
|
||||
unreachable
|
||||
|
||||
nooverflow: ; preds = %entry
|
||||
store i128 %7, i128* %retval
|
||||
%9 = bitcast i128* %retval to %0*
|
||||
%10 = load %0* %9, align 1
|
||||
ret %0 %10
|
||||
}
|
||||
|
||||
declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone
|
||||
|
||||
declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
|
||||
|
||||
declare void @llvm.trap() nounwind
|
||||
|
|
Loading…
Reference in New Issue