forked from OSchip/llvm-project
[X86] Improve mul w/ overflow codegen, to MUL8+SETO.
Currently, @llvm.smul.with.overflow.i8 expands to 9 instructions, where 3 are really needed. This adds X86ISD::UMUL8/SMUL8 SD nodes, and custom lowers them to MUL8/IMUL8 + SETO. i8 is a special case because there is no two/three operand variants of (I)MUL8, so the first operand and return value need to go in AL/AX. Also, we can't write patterns for these instructions: TableGen refuses patterns where output operands don't match SDNode results. In this case, instructions where the output operand is an implicitly defined register. A related special case (and FIXME) exists for MUL8 (X86InstrArith.td): // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), (implicit EFLAGS)] Ideally, these go away with UMUL8, but we still need to improve TableGen support of implicit operands in patterns. Before this change: movsbl %sil, %eax movsbl %dil, %ecx imull %eax, %ecx movb %cl, %al sarb $7, %al movzbl %al, %eax movzbl %ch, %esi cmpl %eax, %esi setne %al After: movb %dil, %al imulb %sil seto %al Also, remove a made-redundant testcase for PR19858, and enable more FastISel ALU-overflow tests for SelectionDAG too. Differential Revision: http://reviews.llvm.org/D5809 llvm-svn: 220516
This commit is contained in:
parent
ff4181adec
commit
5175bcf43a
|
@ -2218,6 +2218,25 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
|
||||
getI8Imm(ShlVal));
|
||||
}
|
||||
case X86ISD::UMUL8:
|
||||
case X86ISD::SMUL8: {
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
SDValue N1 = Node->getOperand(1);
|
||||
|
||||
Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
|
||||
|
||||
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
|
||||
N0, SDValue()).getValue(1);
|
||||
|
||||
SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
|
||||
SDValue Ops[] = {N1, InFlag};
|
||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
|
||||
|
||||
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
|
||||
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
case X86ISD::UMUL: {
|
||||
SDValue N0 = Node->getOperand(0);
|
||||
SDValue N1 = Node->getOperand(1);
|
||||
|
|
|
@ -1597,9 +1597,6 @@ void X86TargetLowering::resetOperationActions() {
|
|||
setOperationAction(ISD::UMULO, VT, Custom);
|
||||
}
|
||||
|
||||
// There are no 8-bit 3-address imul/mul instructions
|
||||
setOperationAction(ISD::SMULO, MVT::i8, Expand);
|
||||
setOperationAction(ISD::UMULO, MVT::i8, Expand);
|
||||
|
||||
if (!Subtarget->is64Bit()) {
|
||||
// These libcalls are not available in 32-bit.
|
||||
|
@ -18190,10 +18187,15 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
|
|||
Cond = X86::COND_B;
|
||||
break;
|
||||
case ISD::SMULO:
|
||||
BaseOp = X86ISD::SMUL;
|
||||
BaseOp = N->getValueType(0) == MVT::i8 ? X86ISD::SMUL8 : X86ISD::SMUL;
|
||||
Cond = X86::COND_O;
|
||||
break;
|
||||
case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
|
||||
if (N->getValueType(0) == MVT::i8) {
|
||||
BaseOp = X86ISD::UMUL8;
|
||||
Cond = X86::COND_O;
|
||||
break;
|
||||
}
|
||||
SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
|
||||
MVT::i32);
|
||||
SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
|
||||
|
|
|
@ -301,6 +301,9 @@ namespace llvm {
|
|||
|
||||
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
|
||||
|
||||
// 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS
|
||||
SMUL8, UMUL8,
|
||||
|
||||
// MUL_IMM - X86 specific multiply by immediate.
|
||||
MUL_IMM,
|
||||
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
; RUN: llc -mcpu=generic -march=x86 < %s | FileCheck %s
|
||||
; PR19858
|
||||
|
||||
declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
|
||||
define i8 @testumulo(i32 %argc) {
|
||||
; CHECK: imull
|
||||
; CHECK: testb %{{.+}}, %{{.+}}
|
||||
; CHECK: je [[NOOVERFLOWLABEL:.+]]
|
||||
; CHECK: {{.*}}[[NOOVERFLOWLABEL]]:
|
||||
; CHECK-NEXT: movb
|
||||
; CHECK-NEXT: retl
|
||||
top:
|
||||
%RHS = trunc i32 %argc to i8
|
||||
%umul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 25, i8 %RHS)
|
||||
%ex = extractvalue { i8, i1 } %umul, 1
|
||||
br i1 %ex, label %overflow, label %nooverlow
|
||||
|
||||
overflow:
|
||||
ret i8 %RHS
|
||||
|
||||
nooverlow:
|
||||
%umul.value = extractvalue { i8, i1 } %umul, 0
|
||||
ret i8 %umul.value
|
||||
}
|
|
@ -123,12 +123,9 @@ entry:
|
|||
; Check boundary conditions for large immediates.
|
||||
define zeroext i1 @saddo.i64imm2(i64 %v1, i64* %res) {
|
||||
entry:
|
||||
; SDAG-LABEL: saddo.i64imm2
|
||||
; SDAG: addq $-2147483648, %rdi
|
||||
; SDAG-NEXT: seto %al
|
||||
; FAST-LABEL: saddo.i64imm2
|
||||
; FAST: addq $-2147483648, %rdi
|
||||
; FAST-NEXT: seto %al
|
||||
; CHECK-LABEL: saddo.i64imm2
|
||||
; CHECK: addq $-2147483648, %rdi
|
||||
; CHECK-NEXT: seto %al
|
||||
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -2147483648)
|
||||
%val = extractvalue {i64, i1} %t, 0
|
||||
%obit = extractvalue {i64, i1} %t, 1
|
||||
|
@ -297,10 +294,10 @@ entry:
|
|||
; SMULO
|
||||
define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
|
||||
entry:
|
||||
; FAST-LABEL: smulo.i8
|
||||
; FAST: movb %dil, %al
|
||||
; FAST-NEXT: imulb %sil
|
||||
; FAST-NEXT: seto %cl
|
||||
; CHECK-LABEL: smulo.i8
|
||||
; CHECK: movb %dil, %al
|
||||
; CHECK-NEXT: imulb %sil
|
||||
; CHECK-NEXT: seto %cl
|
||||
%t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
|
||||
%val = extractvalue {i8, i1} %t, 0
|
||||
%obit = extractvalue {i8, i1} %t, 1
|
||||
|
@ -347,10 +344,10 @@ entry:
|
|||
; UMULO
|
||||
define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
|
||||
entry:
|
||||
; FAST-LABEL: umulo.i8
|
||||
; FAST: movb %dil, %al
|
||||
; FAST-NEXT: mulb %sil
|
||||
; FAST-NEXT: seto %cl
|
||||
; CHECK-LABEL: umulo.i8
|
||||
; CHECK: movb %dil, %al
|
||||
; CHECK-NEXT: mulb %sil
|
||||
; CHECK-NEXT: seto %cl
|
||||
%t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
|
||||
%val = extractvalue {i8, i1} %t, 0
|
||||
%obit = extractvalue {i8, i1} %t, 1
|
||||
|
|
Loading…
Reference in New Issue