it turns out that when ".with.overflow" intrinsics were added to the X86

backend that they were all implemented except umul.  This one fell back
to the default implementation that did a hi/lo multiply and compared the
top.  Fix this to check the overflow flag that the 'mul' instruction
sets, so we can avoid an explicit test.  Now we compile:

void *func(long count) {
      return new int[count];
}

into:

__Z4funcl:                              ## @_Z4funcl
	movl	$4, %ecx                ## encoding: [0xb9,0x04,0x00,0x00,0x00]
	movq	%rdi, %rax              ## encoding: [0x48,0x89,0xf8]
	mulq	%rcx                    ## encoding: [0x48,0xf7,0xe1]
	seto	%cl                     ## encoding: [0x0f,0x90,0xc1]
	testb	%cl, %cl                ## encoding: [0x84,0xc9]
	movq	$-1, %rdi               ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff]
	cmoveq	%rax, %rdi              ## encoding: [0x48,0x0f,0x44,0xf8]
	jmp	__Znam                  ## TAILCALL

instead of:

__Z4funcl:                              ## @_Z4funcl
	movl	$4, %ecx                ## encoding: [0xb9,0x04,0x00,0x00,0x00]
	movq	%rdi, %rax              ## encoding: [0x48,0x89,0xf8]
	mulq	%rcx                    ## encoding: [0x48,0xf7,0xe1]
	testq	%rdx, %rdx              ## encoding: [0x48,0x85,0xd2]
	movq	$-1, %rdi               ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff]
	cmoveq	%rax, %rdi              ## encoding: [0x48,0x0f,0x44,0xf8]
	jmp	__Znam                  ## TAILCALL

Other than the silly seto+test, this is using the o bit directly, so it's going in the right
direction.

llvm-svn: 120935
This commit is contained in:
Chris Lattner 2010-12-05 07:30:36 +00:00
parent 183ddd8ed3
commit 364bb0a081
6 changed files with 73 additions and 21 deletions

View File

@ -1600,7 +1600,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return RetVal;
break;
}
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
unsigned LoReg, HiReg;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; Opc = X86::MUL8r; break;
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; Opc = X86::MUL16r; break;
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; Opc = X86::MUL32r; break;
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; Opc = X86::MUL64r; break;
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
SDValue Ops[] = {N1, InFlag};
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
return NULL;
}
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI: {
SDValue N0 = Node->getOperand(0);
@ -1653,11 +1678,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
array_lengthof(Ops));
InFlag = SDValue(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
} else {
InFlag =
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag);
InFlag = SDValue(CNode, 0);
}
// Prevent use of AH in a REX instruction by referencing AX instead.
@ -1696,7 +1722,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
return NULL;
}

View File

@ -948,6 +948,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::SMULO, MVT::i32, Custom);
setOperationAction(ISD::UMULO, MVT::i32, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
@ -961,6 +962,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
setOperationAction(ISD::USUBO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i64, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
}
if (!Subtarget->is64Bit()) {
@ -7042,7 +7044,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
return NewSetCC;
}
// Look for "(setcc) == / != 1" to avoid unncessary setcc.
// Look for "(setcc) == / != 1" to avoid unnecessary setcc.
if (Op0.getOpcode() == X86ISD::SETCC &&
Op1.getOpcode() == ISD::Constant &&
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
@ -8446,8 +8448,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue RHS = N->getOperand(1);
unsigned BaseOp = 0;
unsigned Cond = 0;
DebugLoc dl = Op.getDebugLoc();
DebugLoc DL = Op.getDebugLoc();
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
@ -8486,19 +8487,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
BaseOp = X86ISD::SMUL;
Cond = X86::COND_O;
break;
case ISD::UMULO:
BaseOp = X86ISD::UMUL;
Cond = X86::COND_B;
break;
case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
MVT::i32);
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
SDValue SetCC =
DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
DAG.getConstant(X86::COND_O, MVT::i32),
SDValue(Sum.getNode(), 2));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
return Sum;
}
}
// Also sets EFLAGS.
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
SDValue SetCC =
DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
DAG.getConstant(Cond, MVT::i32),
SDValue(Sum.getNode(), 1));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
return Sum;

View File

@ -200,9 +200,11 @@ namespace llvm {
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
// ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
ADD, SUB, SMUL, UMUL,
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
ADD, SUB, SMUL,
INC, DEC, OR, XOR, AND,
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
// MUL_IMM - X86 specific multiply by immediate.
MUL_IMM,

View File

@ -60,11 +60,12 @@ def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
"mul{l}\t$src",
[]>; // EAX,EDX = EAX*GR32
"mul{l}\t$src", // EAX,EDX = EAX*GR32
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
"mul{q}\t$src", []>; // RAX,RDX = RAX*GR64
"mul{q}\t$src", // RAX,RDX = RAX*GR64
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),

View File

@ -35,6 +35,12 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>, SDTCisVT<1, i32>]>;
// RES1, RES2, FLAGS = op LHS, RHS
def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>, SDTCisVT<1, i32>]>;
def SDTX86BrCond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
@ -188,7 +194,7 @@ def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
[SDNPCommutative]>;
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;

View File

@ -1,8 +1,14 @@
; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
; RUN: llc < %s -march=x86 | FileCheck %s
declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
define i1 @a(i32 %x) zeroext nounwind {
%res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
%obil = extractvalue {i32, i1} %res, 1
ret i1 %obil
; CHECK: a:
; CHECK: mull
; CHECK: seto %al
; CHECK: movzbl %al, %eax
; CHECK: ret
}