forked from OSchip/llvm-project
[WebAssembly] Optimize memcpy/memmove/memcpy calls.
These calls return their first argument, but because LLVM uses an intrinsic with a void return type, they can't use the returned attribute. Generalize the store results pass to optimize these calls too. llvm-svn: 258781
This commit is contained in:
parent
be6f196bff
commit
bdf08d5da6
|
@ -78,11 +78,6 @@ stores.
|
|||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Memset/memcpy/memmove should be marked with the "returned" attribute somehow,
|
||||
even when they are translated through intrinsics.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Consider implementing optimizeSelect, optimizeCompareInstr, optimizeCondBranch,
|
||||
optimizeLoadInstr, and/or getMachineCombinerPatterns.
|
||||
|
||||
|
|
|
@ -15,7 +15,10 @@
|
|||
#include "WebAssembly.h"
|
||||
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
|
||||
#include "WebAssemblyMachineFunctionInfo.h"
|
||||
#include "WebAssemblySubtarget.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "wasm-peephole"
|
||||
|
@ -28,6 +31,7 @@ class WebAssemblyPeephole final : public MachineFunctionPass {
|
|||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
|
@ -44,11 +48,36 @@ FunctionPass *llvm::createWebAssemblyPeephole() {
|
|||
return new WebAssemblyPeephole();
|
||||
}
|
||||
|
||||
bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
|
||||
/// If desirable, rewrite NewReg to a discard register.
|
||||
static bool MaybeRewriteToDiscard(unsigned OldReg, unsigned NewReg,
|
||||
MachineOperand &MO,
|
||||
WebAssemblyFunctionInfo &MFI,
|
||||
MachineRegisterInfo &MRI) {
|
||||
bool Changed = false;
|
||||
// TODO: Handle SP/physregs
|
||||
if (OldReg == NewReg && TargetRegisterInfo::isVirtualRegister(NewReg)) {
|
||||
Changed = true;
|
||||
unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
|
||||
MO.setReg(NewReg);
|
||||
MO.setIsDead();
|
||||
MFI.stackifyVReg(NewReg);
|
||||
MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg);
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
|
||||
DEBUG({
|
||||
dbgs() << "********** Store Results **********\n"
|
||||
<< "********** Function: " << MF.getName() << '\n';
|
||||
});
|
||||
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
|
||||
const WebAssemblyTargetLowering &TLI =
|
||||
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
|
||||
auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
|
||||
bool Changed = false;
|
||||
|
||||
for (auto &MBB : MF)
|
||||
for (auto &MI : MBB)
|
||||
|
@ -69,17 +98,33 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
|
|||
// can use $discard instead.
|
||||
MachineOperand &MO = MI.getOperand(0);
|
||||
unsigned OldReg = MO.getReg();
|
||||
// TODO: Handle SP/physregs
|
||||
if (OldReg ==
|
||||
MI.getOperand(WebAssembly::StoreValueOperandNo).getReg() &&
|
||||
TargetRegisterInfo::isVirtualRegister(
|
||||
MI.getOperand(WebAssembly::StoreValueOperandNo).getReg())) {
|
||||
Changed = true;
|
||||
unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
|
||||
MO.setReg(NewReg);
|
||||
MO.setIsDead();
|
||||
MFI.stackifyVReg(NewReg);
|
||||
MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg);
|
||||
unsigned NewReg =
|
||||
MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
|
||||
Changed |= MaybeRewriteToDiscard(OldReg, NewReg, MO, MFI, MRI);
|
||||
break;
|
||||
}
|
||||
case WebAssembly::CALL_I32:
|
||||
case WebAssembly::CALL_I64: {
|
||||
MachineOperand &Op1 = MI.getOperand(1);
|
||||
if (Op1.isSymbol()) {
|
||||
StringRef Name(Op1.getSymbolName());
|
||||
if (Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
|
||||
Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
|
||||
Name == TLI.getLibcallName(RTLIB::MEMSET)) {
|
||||
LibFunc::Func Func;
|
||||
if (LibInfo.getLibFunc(Name, Func)) {
|
||||
if (!MI.getOperand(2).isReg())
|
||||
report_fatal_error(
|
||||
"Call to builtin function with wrong signature");
|
||||
MachineOperand &MO = MI.getOperand(0);
|
||||
unsigned OldReg = MO.getReg();
|
||||
unsigned NewReg = MI.getOperand(2).getReg();
|
||||
if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg))
|
||||
report_fatal_error(
|
||||
"Call to builtin function with wrong signature");
|
||||
Changed |= MaybeRewriteToDiscard(OldReg, NewReg, MO, MFI, MRI);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,12 +17,18 @@
|
|||
/// potentially also exposing the store to register stackifying. These both can
|
||||
/// reduce get_local/set_local traffic.
|
||||
///
|
||||
/// This pass also performs this optimization for memcpy, memmove, and memset
|
||||
/// calls, since the LLVM intrinsics for these return void so they can't use the
|
||||
/// returned attribute and consequently aren't handled by the OptimizeReturned
|
||||
/// pass.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "WebAssembly.h"
|
||||
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
|
||||
#include "WebAssemblyMachineFunctionInfo.h"
|
||||
#include "WebAssemblySubtarget.h"
|
||||
#include "llvm/Analysis/TargetLibraryInfo.h"
|
||||
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
@ -49,6 +55,7 @@ public:
|
|||
AU.addPreserved<MachineBlockFrequencyInfo>();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
|
@ -63,6 +70,40 @@ FunctionPass *llvm::createWebAssemblyStoreResults() {
|
|||
return new WebAssemblyStoreResults();
|
||||
}
|
||||
|
||||
// Replace uses of FromReg with ToReg if they are dominated by MI.
|
||||
static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
|
||||
unsigned FromReg, unsigned ToReg,
|
||||
const MachineRegisterInfo &MRI,
|
||||
MachineDominatorTree &MDT) {
|
||||
bool Changed = false;
|
||||
for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
|
||||
MachineOperand &O = *I++;
|
||||
MachineInstr *Where = O.getParent();
|
||||
if (Where->getOpcode() == TargetOpcode::PHI) {
|
||||
// PHIs use their operands on their incoming CFG edges rather than
|
||||
// in their parent blocks. Get the basic block paired with this use
|
||||
// of FromReg and check that MI's block dominates it.
|
||||
MachineBasicBlock *Pred =
|
||||
Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
|
||||
if (!MDT.dominates(&MBB, Pred))
|
||||
continue;
|
||||
} else {
|
||||
// For a non-PHI, check that MI dominates the instruction in the
|
||||
// normal way.
|
||||
if (&MI == Where || !MDT.dominates(&MI, Where))
|
||||
continue;
|
||||
}
|
||||
Changed = true;
|
||||
DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from "
|
||||
<< MI << "\n");
|
||||
O.setReg(ToReg);
|
||||
// If the store's def was previously dead, it is no longer. But the
|
||||
// dead flag shouldn't be set yet.
|
||||
assert(!MI.getOperand(0).isDead() && "Unexpected dead flag");
|
||||
}
|
||||
return Changed;
|
||||
}
|
||||
|
||||
bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
|
||||
DEBUG({
|
||||
dbgs() << "********** Store Results **********\n"
|
||||
|
@ -71,6 +112,9 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
|
||||
const WebAssemblyTargetLowering &TLI =
|
||||
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
|
||||
auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
|
||||
bool Changed = false;
|
||||
|
||||
assert(MRI.isSSA() && "StoreResults depends on SSA form");
|
||||
|
@ -89,36 +133,38 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
|
|||
case WebAssembly::STORE_F32:
|
||||
case WebAssembly::STORE_F64:
|
||||
case WebAssembly::STORE_I32:
|
||||
case WebAssembly::STORE_I64:
|
||||
case WebAssembly::STORE_I64: {
|
||||
unsigned ToReg = MI.getOperand(0).getReg();
|
||||
unsigned FromReg =
|
||||
MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
|
||||
for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
|
||||
MachineOperand &O = *I++;
|
||||
MachineInstr *Where = O.getParent();
|
||||
if (Where->getOpcode() == TargetOpcode::PHI) {
|
||||
// PHIs use their operands on their incoming CFG edges rather than
|
||||
// in their parent blocks. Get the basic block paired with this use
|
||||
// of FromReg and check that MI's block dominates it.
|
||||
MachineBasicBlock *Pred =
|
||||
Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
|
||||
if (!MDT.dominates(&MBB, Pred))
|
||||
continue;
|
||||
} else {
|
||||
// For a non-PHI, check that MI dominates the instruction in the
|
||||
// normal way.
|
||||
if (&MI == Where || !MDT.dominates(&MI, Where))
|
||||
continue;
|
||||
Changed |= ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
|
||||
break;
|
||||
}
|
||||
case WebAssembly::CALL_I32:
|
||||
case WebAssembly::CALL_I64: {
|
||||
MachineOperand &Op1 = MI.getOperand(1);
|
||||
if (Op1.isSymbol()) {
|
||||
StringRef Name(Op1.getSymbolName());
|
||||
if (Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
|
||||
Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
|
||||
Name == TLI.getLibcallName(RTLIB::MEMSET)) {
|
||||
LibFunc::Func Func;
|
||||
if (LibInfo.getLibFunc(Name, Func)) {
|
||||
if (!MI.getOperand(2).isReg())
|
||||
report_fatal_error(
|
||||
"Call to builtin function with wrong signature");
|
||||
unsigned FromReg = MI.getOperand(2).getReg();
|
||||
unsigned ToReg = MI.getOperand(0).getReg();
|
||||
if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
|
||||
report_fatal_error(
|
||||
"Call to builtin function with wrong signature");
|
||||
Changed |=
|
||||
ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
|
||||
}
|
||||
}
|
||||
Changed = true;
|
||||
DEBUG(dbgs() << "Setting operand " << O << " in " << *Where
|
||||
<< " from " << MI << "\n");
|
||||
O.setReg(ToReg);
|
||||
// If the store's def was previously dead, it is no longer. But the
|
||||
// dead flag shouldn't be set yet.
|
||||
assert(!MI.getOperand(0).isDead() && "Dead flag set on store result");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Changed;
|
||||
|
|
|
@ -21,8 +21,8 @@ define i32 @foo() {
|
|||
; CHECK-LABEL: call_memcpy:
|
||||
; CHECK-NEXT: .param i32, i32, i32{{$}}
|
||||
; CHECK-NEXT: .result i32{{$}}
|
||||
; CHECK-NEXT: i32.call $discard=, memcpy@FUNCTION, $0, $1, $2{{$}}
|
||||
; CHECK-NEXT: return $0{{$}}
|
||||
; CHECK-NEXT: i32.call $push0=, memcpy@FUNCTION, $0, $1, $2{{$}}
|
||||
; CHECK-NEXT: return $pop0{{$}}
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
|
||||
define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
; RUN: llc < %s -asm-verbose=false | FileCheck %s
|
||||
|
||||
; Test memcpy, memmove, and memset intrinsics.
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
|
||||
target triple = "wasm32-unknown-unknown"
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
|
||||
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
|
||||
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
|
||||
|
||||
; Test that return values are optimized.
|
||||
|
||||
; CHECK-LABEL: copy_yes:
|
||||
; CHECK: i32.call $push0=, memcpy@FUNCTION, $0, $1, $2{{$}}
|
||||
; CHECK-NEXT: return $pop0{{$}}
|
||||
define i8* @copy_yes(i8* %dst, i8* %src, i32 %len) {
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
|
||||
ret i8* %dst
|
||||
}
|
||||
|
||||
; CHECK-LABEL: copy_no:
|
||||
; CHECK: i32.call $discard=, memcpy@FUNCTION, $0, $1, $2{{$}}
|
||||
; CHECK-NEXT: return{{$}}
|
||||
define void @copy_no(i8* %dst, i8* %src, i32 %len) {
|
||||
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: move_yes:
|
||||
; CHECK: i32.call $push0=, memmove@FUNCTION, $0, $1, $2{{$}}
|
||||
; CHECK-NEXT: return $pop0{{$}}
|
||||
define i8* @move_yes(i8* %dst, i8* %src, i32 %len) {
|
||||
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
|
||||
ret i8* %dst
|
||||
}
|
||||
|
||||
; CHECK-LABEL: move_no:
|
||||
; CHECK: i32.call $discard=, memmove@FUNCTION, $0, $1, $2{{$}}
|
||||
; CHECK-NEXT: return{{$}}
|
||||
define void @move_no(i8* %dst, i8* %src, i32 %len) {
|
||||
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: set_yes:
|
||||
; CHECK: i32.call $push0=, memset@FUNCTION, $0, $1, $2{{$}}
|
||||
; CHECK-NEXT: return $pop0{{$}}
|
||||
define i8* @set_yes(i8* %dst, i8 %src, i32 %len) {
|
||||
call void @llvm.memset.p0i8.i32(i8* %dst, i8 %src, i32 %len, i32 1, i1 false)
|
||||
ret i8* %dst
|
||||
}
|
||||
|
||||
; CHECK-LABEL: set_no:
|
||||
; CHECK: i32.call $discard=, memset@FUNCTION, $0, $1, $2{{$}}
|
||||
; CHECK-NEXT: return{{$}}
|
||||
define void @set_no(i8* %dst, i8 %src, i32 %len) {
|
||||
call void @llvm.memset.p0i8.i32(i8* %dst, i8 %src, i32 %len, i32 1, i1 false)
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue