forked from OSchip/llvm-project
[X86] Add load folding support to the custom isel we do for X86ISD::UMUL/SMUL.
The peephole pass isn't always able to fold the load because it can't commute the implicit usage of AL/AX/EAX/RAX. llvm-svn: 350272
This commit is contained in:
parent
ce46bfa848
commit
df5304d8de
|
@ -3454,31 +3454,73 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
|
|||
SDValue N0 = Node->getOperand(0);
|
||||
SDValue N1 = Node->getOperand(1);
|
||||
|
||||
unsigned LoReg, Opc;
|
||||
unsigned LoReg, ROpc, MOpc;
|
||||
switch (NVT.SimpleTy) {
|
||||
default: llvm_unreachable("Unsupported VT!");
|
||||
case MVT::i8:
|
||||
LoReg = X86::AL;
|
||||
Opc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
|
||||
ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
|
||||
MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
|
||||
break;
|
||||
case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
|
||||
case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
|
||||
case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
|
||||
case MVT::i16:
|
||||
LoReg = X86::AX;
|
||||
ROpc = X86::MUL16r;
|
||||
MOpc = X86::MUL16m;
|
||||
break;
|
||||
case MVT::i32:
|
||||
LoReg = X86::EAX;
|
||||
ROpc = X86::MUL32r;
|
||||
MOpc = X86::MUL32m;
|
||||
break;
|
||||
case MVT::i64:
|
||||
LoReg = X86::RAX;
|
||||
ROpc = X86::MUL64r;
|
||||
MOpc = X86::MUL64m;
|
||||
break;
|
||||
}
|
||||
|
||||
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
|
||||
bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
|
||||
// Multiply is commmutative.
|
||||
if (!FoldedLoad) {
|
||||
FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
|
||||
if (FoldedLoad)
|
||||
std::swap(N0, N1);
|
||||
}
|
||||
|
||||
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
|
||||
N0, SDValue()).getValue(1);
|
||||
|
||||
// i16/i32/i64 use an instruction that produces a low and high result even
|
||||
// though only the low result is used.
|
||||
SDVTList VTs;
|
||||
if (NVT == MVT::i8)
|
||||
VTs = CurDAG->getVTList(NVT, MVT::i32);
|
||||
else
|
||||
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
|
||||
MachineSDNode *CNode;
|
||||
if (FoldedLoad) {
|
||||
// i16/i32/i64 use an instruction that produces a low and high result even
|
||||
// though only the low result is used.
|
||||
SDVTList VTs;
|
||||
if (NVT == MVT::i8)
|
||||
VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
|
||||
else
|
||||
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
|
||||
|
||||
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
|
||||
InFlag };
|
||||
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
|
||||
|
||||
// Update the chain.
|
||||
ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
|
||||
// Record the mem-refs
|
||||
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
|
||||
} else {
|
||||
// i16/i32/i64 use an instruction that produces a low and high result even
|
||||
// though only the low result is used.
|
||||
SDVTList VTs;
|
||||
if (NVT == MVT::i8)
|
||||
VTs = CurDAG->getVTList(NVT, MVT::i32);
|
||||
else
|
||||
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
|
||||
|
||||
CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
|
||||
}
|
||||
|
||||
SDValue Ops[] = {N1, InFlag};
|
||||
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
|
||||
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
|
||||
ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
|
||||
CurDAG->RemoveDeadNode(Node);
|
||||
|
|
|
@ -7,9 +7,8 @@ declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
|
|||
define zeroext i1 @a(i32 %x) nounwind {
|
||||
; X86-LABEL: a:
|
||||
; X86: # %bb.0:
|
||||
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-NEXT: movl $3, %ecx
|
||||
; X86-NEXT: mull %ecx
|
||||
; X86-NEXT: movl $3, %eax
|
||||
; X86-NEXT: mull {{[0-9]+}}(%esp)
|
||||
; X86-NEXT: seto %al
|
||||
; X86-NEXT: retl
|
||||
;
|
||||
|
|
|
@ -725,8 +725,9 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
|
|||
define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
|
||||
; SDAG-LABEL: smuloi8_load:
|
||||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movb (%rdi), %al
|
||||
; SDAG-NEXT: imulb %sil
|
||||
; SDAG-NEXT: movl %esi, %eax
|
||||
; SDAG-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; SDAG-NEXT: imulb (%rdi)
|
||||
; SDAG-NEXT: seto %cl
|
||||
; SDAG-NEXT: movb %al, (%rdx)
|
||||
; SDAG-NEXT: movl %ecx, %eax
|
||||
|
@ -753,9 +754,8 @@ define zeroext i1 @smuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
|
|||
; SDAG-LABEL: smuloi8_load2:
|
||||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movl %edi, %eax
|
||||
; SDAG-NEXT: movb (%rsi), %cl
|
||||
; SDAG-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; SDAG-NEXT: imulb %cl
|
||||
; SDAG-NEXT: imulb (%rsi)
|
||||
; SDAG-NEXT: seto %cl
|
||||
; SDAG-NEXT: movb %al, (%rdx)
|
||||
; SDAG-NEXT: movl %ecx, %eax
|
||||
|
@ -926,8 +926,9 @@ define zeroext i1 @smuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
|
|||
define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
|
||||
; SDAG-LABEL: umuloi8_load:
|
||||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movb (%rdi), %al
|
||||
; SDAG-NEXT: mulb %sil
|
||||
; SDAG-NEXT: movl %esi, %eax
|
||||
; SDAG-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; SDAG-NEXT: mulb (%rdi)
|
||||
; SDAG-NEXT: seto %cl
|
||||
; SDAG-NEXT: movb %al, (%rdx)
|
||||
; SDAG-NEXT: movl %ecx, %eax
|
||||
|
@ -954,9 +955,8 @@ define zeroext i1 @umuloi8_load2(i8 %v1, i8* %ptr2, i8* %res) {
|
|||
; SDAG-LABEL: umuloi8_load2:
|
||||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movl %edi, %eax
|
||||
; SDAG-NEXT: movb (%rsi), %cl
|
||||
; SDAG-NEXT: ## kill: def $al killed $al killed $eax
|
||||
; SDAG-NEXT: mulb %cl
|
||||
; SDAG-NEXT: mulb (%rsi)
|
||||
; SDAG-NEXT: seto %cl
|
||||
; SDAG-NEXT: movb %al, (%rdx)
|
||||
; SDAG-NEXT: movl %ecx, %eax
|
||||
|
@ -984,8 +984,9 @@ define zeroext i1 @umuloi16_load(i16* %ptr1, i16 %v2, i16* %res) {
|
|||
; SDAG-LABEL: umuloi16_load:
|
||||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movq %rdx, %rcx
|
||||
; SDAG-NEXT: movzwl (%rdi), %eax
|
||||
; SDAG-NEXT: mulw %si
|
||||
; SDAG-NEXT: movl %esi, %eax
|
||||
; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; SDAG-NEXT: mulw (%rdi)
|
||||
; SDAG-NEXT: seto %dl
|
||||
; SDAG-NEXT: movw %ax, (%rcx)
|
||||
; SDAG-NEXT: movl %edx, %eax
|
||||
|
@ -1014,9 +1015,8 @@ define zeroext i1 @umuloi16_load2(i16 %v1, i16* %ptr2, i16* %res) {
|
|||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movq %rdx, %rcx
|
||||
; SDAG-NEXT: movl %edi, %eax
|
||||
; SDAG-NEXT: movzwl (%rsi), %edx
|
||||
; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
|
||||
; SDAG-NEXT: mulw %dx
|
||||
; SDAG-NEXT: mulw (%rsi)
|
||||
; SDAG-NEXT: seto %dl
|
||||
; SDAG-NEXT: movw %ax, (%rcx)
|
||||
; SDAG-NEXT: movl %edx, %eax
|
||||
|
@ -1045,8 +1045,8 @@ define zeroext i1 @umuloi32_load(i32* %ptr1, i32 %v2, i32* %res) {
|
|||
; SDAG-LABEL: umuloi32_load:
|
||||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movq %rdx, %rcx
|
||||
; SDAG-NEXT: movl (%rdi), %eax
|
||||
; SDAG-NEXT: mull %esi
|
||||
; SDAG-NEXT: movl %esi, %eax
|
||||
; SDAG-NEXT: mull (%rdi)
|
||||
; SDAG-NEXT: seto %dl
|
||||
; SDAG-NEXT: movl %eax, (%rcx)
|
||||
; SDAG-NEXT: movl %edx, %eax
|
||||
|
@ -1075,8 +1075,7 @@ define zeroext i1 @umuloi32_load2(i32 %v1, i32* %ptr2, i32* %res) {
|
|||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movq %rdx, %rcx
|
||||
; SDAG-NEXT: movl %edi, %eax
|
||||
; SDAG-NEXT: movl (%rsi), %edx
|
||||
; SDAG-NEXT: mull %edx
|
||||
; SDAG-NEXT: mull (%rsi)
|
||||
; SDAG-NEXT: seto %dl
|
||||
; SDAG-NEXT: movl %eax, (%rcx)
|
||||
; SDAG-NEXT: movl %edx, %eax
|
||||
|
@ -1104,8 +1103,8 @@ define zeroext i1 @umuloi64_load(i64* %ptr1, i64 %v2, i64* %res) {
|
|||
; SDAG-LABEL: umuloi64_load:
|
||||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movq %rdx, %rcx
|
||||
; SDAG-NEXT: movq (%rdi), %rax
|
||||
; SDAG-NEXT: mulq %rsi
|
||||
; SDAG-NEXT: movq %rsi, %rax
|
||||
; SDAG-NEXT: mulq (%rdi)
|
||||
; SDAG-NEXT: seto %dl
|
||||
; SDAG-NEXT: movq %rax, (%rcx)
|
||||
; SDAG-NEXT: movl %edx, %eax
|
||||
|
@ -1134,8 +1133,7 @@ define zeroext i1 @umuloi64_load2(i64 %v1, i64* %ptr2, i64* %res) {
|
|||
; SDAG: ## %bb.0:
|
||||
; SDAG-NEXT: movq %rdx, %rcx
|
||||
; SDAG-NEXT: movq %rdi, %rax
|
||||
; SDAG-NEXT: movq (%rsi), %rdx
|
||||
; SDAG-NEXT: mulq %rdx
|
||||
; SDAG-NEXT: mulq (%rsi)
|
||||
; SDAG-NEXT: seto %dl
|
||||
; SDAG-NEXT: movq %rax, (%rcx)
|
||||
; SDAG-NEXT: movl %edx, %eax
|
||||
|
|
Loading…
Reference in New Issue