forked from OSchip/llvm-project
it turns out that when ".with.overflow" intrinsics were added to the X86
backend that they were all implemented except umul. This one fell back to the default implementation that did a hi/lo multiply and compared the top. Fix this to check the overflow flag that the 'mul' instruction sets, so we can avoid an explicit test. Now we compile: void *func(long count) { return new int[count]; } into: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] seto %cl ## encoding: [0x0f,0x90,0xc1] testb %cl, %cl ## encoding: [0x84,0xc9] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL instead of: __Z4funcl: ## @_Z4funcl movl $4, %ecx ## encoding: [0xb9,0x04,0x00,0x00,0x00] movq %rdi, %rax ## encoding: [0x48,0x89,0xf8] mulq %rcx ## encoding: [0x48,0xf7,0xe1] testq %rdx, %rdx ## encoding: [0x48,0x85,0xd2] movq $-1, %rdi ## encoding: [0x48,0xc7,0xc7,0xff,0xff,0xff,0xff] cmoveq %rax, %rdi ## encoding: [0x48,0x0f,0x44,0xf8] jmp __Znam ## TAILCALL Other than the silly seto+test, this is using the o bit directly, so it's going in the right direction. llvm-svn: 120935
This commit is contained in:
parent
183ddd8ed3
commit
364bb0a081
|
@ -1600,7 +1600,32 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
return RetVal;
|
||||
break;
|
||||
}
|
||||
|
||||
case X86ISD::UMUL: {
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
SDValue N1 = Node->getOperand(1);
|
||||
|
||||
unsigned LoReg, HiReg;
|
||||
switch (NVT.getSimpleVT().SimpleTy) {
|
||||
default: llvm_unreachable("Unsupported VT!");
|
||||
case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; Opc = X86::MUL8r; break;
|
||||
case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; Opc = X86::MUL16r; break;
|
||||
case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; Opc = X86::MUL32r; break;
|
||||
case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; Opc = X86::MUL64r; break;
|
||||
}
|
||||
|
||||
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
|
||||
N0, SDValue()).getValue(1);
|
||||
|
||||
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
|
||||
SDValue Ops[] = {N1, InFlag};
|
||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
|
||||
|
||||
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
|
||||
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
|
||||
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
case ISD::SMUL_LOHI:
|
||||
case ISD::UMUL_LOHI: {
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
|
@ -1653,11 +1678,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops,
|
||||
array_lengthof(Ops));
|
||||
InFlag = SDValue(CNode, 1);
|
||||
|
||||
// Update the chain.
|
||||
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
|
||||
} else {
|
||||
InFlag =
|
||||
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
|
||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag);
|
||||
InFlag = SDValue(CNode, 0);
|
||||
}
|
||||
|
||||
// Prevent use of AH in a REX instruction by referencing AX instead.
|
||||
|
@ -1696,7 +1722,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
ReplaceUses(SDValue(Node, 1), Result);
|
||||
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
|
||||
}
|
||||
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
|
@ -948,6 +948,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
|
||||
setOperationAction(ISD::USUBO, MVT::i32, Custom);
|
||||
setOperationAction(ISD::SMULO, MVT::i32, Custom);
|
||||
setOperationAction(ISD::UMULO, MVT::i32, Custom);
|
||||
|
||||
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
|
||||
// handle type legalization for these operations here.
|
||||
|
@ -961,6 +962,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
|
||||
setOperationAction(ISD::USUBO, MVT::i64, Custom);
|
||||
setOperationAction(ISD::SMULO, MVT::i64, Custom);
|
||||
setOperationAction(ISD::UMULO, MVT::i64, Custom);
|
||||
}
|
||||
|
||||
if (!Subtarget->is64Bit()) {
|
||||
|
@ -7042,7 +7044,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
|||
return NewSetCC;
|
||||
}
|
||||
|
||||
// Look for "(setcc) == / != 1" to avoid unncessary setcc.
|
||||
// Look for "(setcc) == / != 1" to avoid unnecessary setcc.
|
||||
if (Op0.getOpcode() == X86ISD::SETCC &&
|
||||
Op1.getOpcode() == ISD::Constant &&
|
||||
(cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
|
||||
|
@ -8446,8 +8448,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
|
|||
SDValue RHS = N->getOperand(1);
|
||||
unsigned BaseOp = 0;
|
||||
unsigned Cond = 0;
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
switch (Op.getOpcode()) {
|
||||
default: llvm_unreachable("Unknown ovf instruction!");
|
||||
case ISD::SADDO:
|
||||
|
@ -8486,19 +8487,29 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
|
|||
BaseOp = X86ISD::SMUL;
|
||||
Cond = X86::COND_O;
|
||||
break;
|
||||
case ISD::UMULO:
|
||||
BaseOp = X86ISD::UMUL;
|
||||
Cond = X86::COND_B;
|
||||
break;
|
||||
case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
|
||||
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
|
||||
MVT::i32);
|
||||
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
|
||||
|
||||
SDValue SetCC =
|
||||
DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
|
||||
DAG.getConstant(X86::COND_O, MVT::i32),
|
||||
SDValue(Sum.getNode(), 2));
|
||||
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
|
||||
return Sum;
|
||||
}
|
||||
}
|
||||
|
||||
// Also sets EFLAGS.
|
||||
SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
|
||||
SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS);
|
||||
SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
|
||||
|
||||
SDValue SetCC =
|
||||
DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1),
|
||||
DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1));
|
||||
DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
|
||||
DAG.getConstant(Cond, MVT::i32),
|
||||
SDValue(Sum.getNode(), 1));
|
||||
|
||||
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
|
||||
return Sum;
|
||||
|
|
|
@ -200,9 +200,11 @@ namespace llvm {
|
|||
PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
|
||||
PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
|
||||
|
||||
// ADD, SUB, SMUL, UMUL, etc. - Arithmetic operations with FLAGS results.
|
||||
ADD, SUB, SMUL, UMUL,
|
||||
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
|
||||
ADD, SUB, SMUL,
|
||||
INC, DEC, OR, XOR, AND,
|
||||
|
||||
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
|
||||
|
||||
// MUL_IMM - X86 specific multiply by immediate.
|
||||
MUL_IMM,
|
||||
|
|
|
@ -60,11 +60,12 @@ def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
|
|||
|
||||
let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
|
||||
def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
|
||||
"mul{l}\t$src",
|
||||
[]>; // EAX,EDX = EAX*GR32
|
||||
"mul{l}\t$src", // EAX,EDX = EAX*GR32
|
||||
[/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
|
||||
let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
|
||||
def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
|
||||
"mul{q}\t$src", []>; // RAX,RDX = RAX*GR64
|
||||
"mul{q}\t$src", // RAX,RDX = RAX*GR64
|
||||
[/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
|
||||
|
||||
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
|
||||
def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
|
||||
|
|
|
@ -35,6 +35,12 @@ def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
|
|||
[SDTCisSameAs<0, 2>,
|
||||
SDTCisSameAs<0, 3>,
|
||||
SDTCisInt<0>, SDTCisVT<1, i32>]>;
|
||||
// RES1, RES2, FLAGS = op LHS, RHS
|
||||
def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
|
||||
[SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>,
|
||||
SDTCisSameAs<0, 3>,
|
||||
SDTCisInt<0>, SDTCisVT<1, i32>]>;
|
||||
def SDTX86BrCond : SDTypeProfile<0, 3,
|
||||
[SDTCisVT<0, OtherVT>,
|
||||
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
|
||||
|
@ -188,7 +194,7 @@ def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags,
|
|||
def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>;
|
||||
def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
|
||||
[SDNPCommutative]>;
|
||||
def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
|
||||
def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
|
||||
[SDNPCommutative]>;
|
||||
|
||||
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
|
||||
|
|
|
@ -1,8 +1,14 @@
|
|||
; RUN: llc < %s -march=x86 | grep "\\\\\\\<mul"
|
||||
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
|
||||
declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
|
||||
define i1 @a(i32 %x) zeroext nounwind {
|
||||
%res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
|
||||
%obil = extractvalue {i32, i1} %res, 1
|
||||
ret i1 %obil
|
||||
|
||||
; CHECK: a:
|
||||
; CHECK: mull
|
||||
; CHECK: seto %al
|
||||
; CHECK: movzbl %al, %eax
|
||||
; CHECK: ret
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue