forked from OSchip/llvm-project
Revert "Enhance peephole optimization."
This reverts commit d84ca05ef7
.
Will revert, update commit message and re-commit.
This commit is contained in:
parent
035aee725c
commit
1555c41abb
|
@ -25,16 +25,13 @@
|
|||
#include "llvm/CodeGen/MachineConstantPool.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/StackMaps.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/InstrTypes.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
|
@ -967,101 +964,6 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
|
|||
return ShAmt < 4 && ShAmt > 0;
|
||||
}
|
||||
|
||||
static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
|
||||
MachineInstr &CmpValDefInstr,
|
||||
const MachineRegisterInfo *MRI,
|
||||
MachineInstr **AndInstr,
|
||||
const TargetRegisterInfo *TRI,
|
||||
bool &NoSignFlag, bool &ClearsOverflowFlag) {
|
||||
if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG)
|
||||
return false;
|
||||
|
||||
if (CmpInstr.getOpcode() != X86::TEST64rr)
|
||||
return false;
|
||||
|
||||
// CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare`
|
||||
// guarantees that it's analyzable only if two registers are identical.
|
||||
assert(
|
||||
(CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
|
||||
"CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` "
|
||||
"requires two reg operands are the same.");
|
||||
|
||||
// Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
|
||||
// `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
|
||||
// if `CmpValDefInstr` sets the EFLAGS, it is likely that `CmpInstr` is
|
||||
// redundant.
|
||||
assert(
|
||||
(MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
|
||||
"Caller guarantees that TEST64rr is a user of SUBREG_TO_REG.");
|
||||
|
||||
// As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically
|
||||
// 0.
|
||||
if (CmpValDefInstr.getOperand(1).getImm() != 0)
|
||||
return false;
|
||||
|
||||
// As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
|
||||
// sub_32bit or sub_xmm.
|
||||
if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
|
||||
return false;
|
||||
|
||||
MachineInstr *VregDefInstr =
|
||||
MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
|
||||
|
||||
assert(VregDefInstr && "Must have a definition (SSA)");
|
||||
|
||||
// Requires `CmpValDefInstr` and `VregDefInstr` are from the same MBB
|
||||
// to simplify the subsequent analysis.
|
||||
//
|
||||
// FIXME: If `VregDefInstr->getParent()` is the only predecessor of
|
||||
// `CmpValDefInstr.getParent()`, this could be handled.
|
||||
if (VregDefInstr->getParent() != CmpValDefInstr.getParent())
|
||||
return false;
|
||||
|
||||
if (X86::isAND(VregDefInstr->getOpcode())) {
|
||||
// Get a sequence of instructions like
|
||||
// %reg = and* ... // Set EFLAGS
|
||||
// ... // EFLAGS not changed
|
||||
// %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
|
||||
// test64rr %extended_reg, %extended_reg, implicit-def $eflags
|
||||
//
|
||||
// If subsequent readers use a subset of bits that don't change
|
||||
// after `and*` instructions, it's likely that the test64rr could
|
||||
// be optimized away.
|
||||
for (const MachineInstr &Instr :
|
||||
make_range(std::next(MachineBasicBlock::iterator(VregDefInstr)),
|
||||
MachineBasicBlock::iterator(CmpValDefInstr))) {
|
||||
// There are instructions between 'VregDefInstr' and
|
||||
// 'CmpValDefInstr' that modifies EFLAGS.
|
||||
if (Instr.modifiesRegister(X86::EFLAGS, TRI))
|
||||
return false;
|
||||
}
|
||||
|
||||
*AndInstr = VregDefInstr;
|
||||
|
||||
// AND instruction will essentially update SF and clear OF, so
|
||||
// NoSignFlag should be false in the sense that SF is modified by `AND`.
|
||||
//
|
||||
// However, the implementation artifically sets `NoSignFlag` to true
|
||||
// to poison the SF bit; that is to say, if SF is looked at later, the
|
||||
// optimization (to erase TEST64rr) will be disabled.
|
||||
//
|
||||
// The reason to poison SF bit is that SF bit value could be different
|
||||
// in the `AND` and `TEST` operation; signed bit is not known for `AND`,
|
||||
// and is known to be 0 as a result of `TEST64rr`.
|
||||
//
|
||||
// FIXME: As opposed to poisoning the SF bit direclty, consider peeking into
|
||||
// the AND instruction and using the static information to guide peephole optimization if possible.
|
||||
// For example, it's possible to fold a conditional move into a copy
|
||||
// if the relevant EFLAG bits could be deduced from an immediate operand of and operation.
|
||||
//
|
||||
NoSignFlag = true;
|
||||
// ClearsOverflowFlag is true for AND operation (no surprise).
|
||||
ClearsOverflowFlag = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
|
||||
unsigned Opc, bool AllowSP, Register &NewSrc,
|
||||
bool &isKill, MachineOperand &ImplicitOp,
|
||||
|
@ -4324,23 +4226,6 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
|
|||
MI = &Inst;
|
||||
break;
|
||||
}
|
||||
|
||||
// Look back for the following pattern, in which case the test64rr
|
||||
// instruction could be erased.
|
||||
//
|
||||
// Example:
|
||||
// %reg = and32ri %in_reg, 5
|
||||
// ... // EFLAGS not changed.
|
||||
// %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
|
||||
// test64rr %src_reg, %src_reg, implicit-def $eflags
|
||||
MachineInstr *AndInstr = nullptr;
|
||||
if (IsCmpZero &&
|
||||
findRedundantFlagInstr(CmpInstr, Inst, MRI, &AndInstr, TRI,
|
||||
NoSignFlag, ClearsOverflowFlag)) {
|
||||
assert(AndInstr != nullptr && X86::isAND(AndInstr->getOpcode()));
|
||||
MI = AndInstr;
|
||||
break;
|
||||
}
|
||||
// Cannot find other candidates before definition of SrcReg.
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -1,196 +0,0 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -o - %s -mtriple=x86_64-unknown-linux-gnu --run-pass=peephole-opt | FileCheck %s
|
||||
|
||||
# Test that TEST64rr is erased in `test_erased`, and kept in `test_not_erased_when_sf_used`
|
||||
# and `test_not_erased_when_eflags_change`.
|
||||
|
||||
--- |
|
||||
; ModuleID = 'tmp.ll'
|
||||
source_filename = "tmp.ll"
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define i64 @test_erased(ptr %0, i64 %1, i64 %2) {
|
||||
%4 = load i64, ptr %0, align 8
|
||||
%5 = and i64 %4, 3
|
||||
%6 = icmp eq i64 %5, 0
|
||||
%7 = select i1 %6, i64 %1, i64 %5
|
||||
store i64 %7, ptr %0, align 8
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define i64 @test_not_erased_when_sf_used(ptr %0, i64 %1, i64 %2, i64 %3) {
|
||||
%5 = load i64, ptr %0, align 8
|
||||
%6 = and i64 %5, 3
|
||||
%7 = icmp slt i64 %6, 0
|
||||
%8 = select i1 %7, i64 %1, i64 %6
|
||||
store i64 %8, ptr %0, align 8
|
||||
ret i64 %5
|
||||
}
|
||||
|
||||
define void @test_not_erased_when_eflags_change(ptr %0, i64 %1, i64 %2, i64 %3, ptr %4) {
|
||||
%6 = load i64, ptr %0, align 8
|
||||
%7 = and i64 %6, 3
|
||||
%8 = xor i64 %3, 5
|
||||
%9 = icmp eq i64 %7, 0
|
||||
%10 = select i1 %9, i64 %1, i64 %7
|
||||
store i64 %10, ptr %0, align 8
|
||||
store i64 %8, ptr %4, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
name: test_erased
|
||||
alignment: 16
|
||||
tracksDebugUserValues: false
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: gr64, preferred-register: '' }
|
||||
- { id: 2, class: gr64, preferred-register: '' }
|
||||
- { id: 3, class: gr64, preferred-register: '' }
|
||||
- { id: 4, class: gr32, preferred-register: '' }
|
||||
- { id: 5, class: gr32, preferred-register: '' }
|
||||
- { id: 6, class: gr64, preferred-register: '' }
|
||||
- { id: 7, class: gr64, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$rsi', virtual-reg: '%1' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.3):
|
||||
liveins: $rdi, $rsi
|
||||
|
||||
; CHECK-LABEL: name: test_erased
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit
|
||||
; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY2]], 3, implicit-def $eflags
|
||||
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit
|
||||
; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY]], 4, implicit $eflags
|
||||
; CHECK-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0)
|
||||
; CHECK-NEXT: $rax = COPY [[SUBREG_TO_REG]]
|
||||
; CHECK-NEXT: RET 0, $rax
|
||||
%1:gr64 = COPY $rsi
|
||||
%0:gr64 = COPY $rdi
|
||||
%3:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
|
||||
%4:gr32 = COPY %3.sub_32bit
|
||||
%5:gr32 = AND32ri8 %4, 3, implicit-def dead $eflags
|
||||
%6:gr64 = SUBREG_TO_REG 0, killed %5, %subreg.sub_32bit
|
||||
TEST64rr %6, %6, implicit-def $eflags
|
||||
%7:gr64 = CMOV64rr %6, %1, 4, implicit $eflags
|
||||
MOV64mr %0, 1, $noreg, 0, $noreg, killed %7 :: (store (s64) into %ir.0)
|
||||
$rax = COPY %6
|
||||
RET 0, $rax
|
||||
|
||||
...
|
||||
---
|
||||
name: test_not_erased_when_sf_used
|
||||
alignment: 16
|
||||
tracksDebugUserValues: false
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: gr64, preferred-register: '' }
|
||||
- { id: 2, class: gr64, preferred-register: '' }
|
||||
- { id: 3, class: gr64, preferred-register: '' }
|
||||
- { id: 4, class: gr64, preferred-register: '' }
|
||||
- { id: 5, class: gr32, preferred-register: '' }
|
||||
- { id: 6, class: gr32, preferred-register: '' }
|
||||
- { id: 7, class: gr64, preferred-register: '' }
|
||||
- { id: 8, class: gr64, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$rsi', virtual-reg: '%1' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.4):
|
||||
liveins: $rdi, $rsi
|
||||
|
||||
; CHECK-LABEL: name: test_not_erased_when_sf_used
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit
|
||||
; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY2]], 3, implicit-def dead $eflags
|
||||
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit
|
||||
; CHECK-NEXT: TEST64rr [[SUBREG_TO_REG]], [[SUBREG_TO_REG]], implicit-def $eflags
|
||||
; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY]], 8, implicit $eflags
|
||||
; CHECK-NEXT: MOV64mr [[COPY1]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0)
|
||||
; CHECK-NEXT: $rax = COPY [[MOV64rm]]
|
||||
; CHECK-NEXT: RET 0, $rax
|
||||
%1:gr64 = COPY $rsi
|
||||
%0:gr64 = COPY $rdi
|
||||
%4:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
|
||||
%5:gr32 = COPY %4.sub_32bit
|
||||
%6:gr32 = AND32ri8 %5, 3, implicit-def dead $eflags
|
||||
%7:gr64 = SUBREG_TO_REG 0, killed %6, %subreg.sub_32bit
|
||||
TEST64rr %7, %7, implicit-def $eflags
|
||||
%8:gr64 = CMOV64rr %7, %1, 8, implicit $eflags
|
||||
MOV64mr %0, 1, $noreg, 0, $noreg, killed %8 :: (store (s64) into %ir.0)
|
||||
$rax = COPY %4
|
||||
RET 0, $rax
|
||||
|
||||
...
|
||||
---
|
||||
name: test_not_erased_when_eflags_change
|
||||
alignment: 16
|
||||
tracksDebugUserValues: false
|
||||
registers:
|
||||
- { id: 0, class: gr64, preferred-register: '' }
|
||||
- { id: 1, class: gr64, preferred-register: '' }
|
||||
- { id: 2, class: gr64, preferred-register: '' }
|
||||
- { id: 3, class: gr64, preferred-register: '' }
|
||||
- { id: 4, class: gr64, preferred-register: '' }
|
||||
- { id: 5, class: gr64, preferred-register: '' }
|
||||
- { id: 6, class: gr32, preferred-register: '' }
|
||||
- { id: 7, class: gr32, preferred-register: '' }
|
||||
- { id: 8, class: gr64, preferred-register: '' }
|
||||
- { id: 9, class: gr64, preferred-register: '' }
|
||||
- { id: 10, class: gr64, preferred-register: '' }
|
||||
liveins:
|
||||
- { reg: '$rdi', virtual-reg: '%0' }
|
||||
- { reg: '$rsi', virtual-reg: '%1' }
|
||||
- { reg: '$rcx', virtual-reg: '%3' }
|
||||
- { reg: '$r8', virtual-reg: '%4' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0 (%ir-block.5):
|
||||
liveins: $rdi, $rsi, $rcx, $r8
|
||||
|
||||
; CHECK-LABEL: name: test_not_erased_when_eflags_change
|
||||
; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $r8
|
||||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rcx
|
||||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY $rsi
|
||||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rdi
|
||||
; CHECK-NEXT: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm [[COPY3]], 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
|
||||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[MOV64rm]].sub_32bit
|
||||
; CHECK-NEXT: [[AND32ri8_:%[0-9]+]]:gr32 = AND32ri8 [[COPY4]], 3, implicit-def dead $eflags
|
||||
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64 = SUBREG_TO_REG 0, killed [[AND32ri8_]], %subreg.sub_32bit
|
||||
; CHECK-NEXT: [[XOR64ri8_:%[0-9]+]]:gr64 = XOR64ri8 [[COPY1]], 5, implicit-def dead $eflags
|
||||
; CHECK-NEXT: TEST64rr [[SUBREG_TO_REG]], [[SUBREG_TO_REG]], implicit-def $eflags
|
||||
; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[SUBREG_TO_REG]], [[COPY2]], 4, implicit $eflags
|
||||
; CHECK-NEXT: MOV64mr [[COPY3]], 1, $noreg, 0, $noreg, killed [[CMOV64rr]] :: (store (s64) into %ir.0)
|
||||
; CHECK-NEXT: MOV64mr [[COPY]], 1, $noreg, 0, $noreg, killed [[XOR64ri8_]] :: (store (s64) into %ir.4)
|
||||
; CHECK-NEXT: RET 0
|
||||
%4:gr64 = COPY $r8
|
||||
%3:gr64 = COPY $rcx
|
||||
%1:gr64 = COPY $rsi
|
||||
%0:gr64 = COPY $rdi
|
||||
%5:gr64 = MOV64rm %0, 1, $noreg, 0, $noreg :: (load (s64) from %ir.0)
|
||||
%6:gr32 = COPY %5.sub_32bit
|
||||
%7:gr32 = AND32ri8 %6, 3, implicit-def dead $eflags
|
||||
%8:gr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32bit
|
||||
%9:gr64 = XOR64ri8 %3, 5, implicit-def dead $eflags
|
||||
TEST64rr %8, %8, implicit-def $eflags
|
||||
%10:gr64 = CMOV64rr %8, %1, 4, implicit $eflags
|
||||
MOV64mr %0, 1, $noreg, 0, $noreg, killed %10 :: (store (s64) into %ir.0)
|
||||
MOV64mr %4, 1, $noreg, 0, $noreg, killed %9 :: (store (s64) into %ir.4)
|
||||
RET 0
|
||||
|
||||
...
|
Loading…
Reference in New Issue