[WebAssembly] Optimize memcpy/memmove/memcpy calls.

These calls return their first argument, but because LLVM uses an intrinsic
with a void return type, they can't use the returned attribute. Generalize
the store results pass to optimize these calls too.

llvm-svn: 258781
This commit is contained in:
Dan Gohman 2016-01-26 04:01:11 +00:00
parent be6f196bff
commit bdf08d5da6
5 changed files with 189 additions and 43 deletions

View File

@ -78,11 +78,6 @@ stores.
//===---------------------------------------------------------------------===//
Memset/memcpy/memmove should be marked with the "returned" attribute somehow,
even when they are translated through intrinsics.
//===---------------------------------------------------------------------===//
Consider implementing optimizeSelect, optimizeCompareInstr, optimizeCondBranch,
optimizeLoadInstr, and/or getMachineCombinerPatterns.

View File

@ -15,7 +15,10 @@
#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-peephole"
@ -28,6 +31,7 @@ class WebAssemblyPeephole final : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -44,11 +48,36 @@ FunctionPass *llvm::createWebAssemblyPeephole() {
return new WebAssemblyPeephole();
}
bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
/// If desirable, rewrite NewReg to a discard register.
static bool MaybeRewriteToDiscard(unsigned OldReg, unsigned NewReg,
MachineOperand &MO,
WebAssemblyFunctionInfo &MFI,
MachineRegisterInfo &MRI) {
bool Changed = false;
// TODO: Handle SP/physregs
if (OldReg == NewReg && TargetRegisterInfo::isVirtualRegister(NewReg)) {
Changed = true;
unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
MO.setReg(NewReg);
MO.setIsDead();
MFI.stackifyVReg(NewReg);
MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg);
}
return Changed;
}
bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
DEBUG({
dbgs() << "********** Store Results **********\n"
<< "********** Function: " << MF.getName() << '\n';
});
MachineRegisterInfo &MRI = MF.getRegInfo();
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
const WebAssemblyTargetLowering &TLI =
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
for (auto &MBB : MF)
for (auto &MI : MBB)
@ -69,17 +98,33 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
// can use $discard instead.
MachineOperand &MO = MI.getOperand(0);
unsigned OldReg = MO.getReg();
// TODO: Handle SP/physregs
if (OldReg ==
MI.getOperand(WebAssembly::StoreValueOperandNo).getReg() &&
TargetRegisterInfo::isVirtualRegister(
MI.getOperand(WebAssembly::StoreValueOperandNo).getReg())) {
Changed = true;
unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
MO.setReg(NewReg);
MO.setIsDead();
MFI.stackifyVReg(NewReg);
MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg);
unsigned NewReg =
MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
Changed |= MaybeRewriteToDiscard(OldReg, NewReg, MO, MFI, MRI);
break;
}
case WebAssembly::CALL_I32:
case WebAssembly::CALL_I64: {
MachineOperand &Op1 = MI.getOperand(1);
if (Op1.isSymbol()) {
StringRef Name(Op1.getSymbolName());
if (Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
Name == TLI.getLibcallName(RTLIB::MEMSET)) {
LibFunc::Func Func;
if (LibInfo.getLibFunc(Name, Func)) {
if (!MI.getOperand(2).isReg())
report_fatal_error(
"Call to builtin function with wrong signature");
MachineOperand &MO = MI.getOperand(0);
unsigned OldReg = MO.getReg();
unsigned NewReg = MI.getOperand(2).getReg();
if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg))
report_fatal_error(
"Call to builtin function with wrong signature");
Changed |= MaybeRewriteToDiscard(OldReg, NewReg, MO, MFI, MRI);
}
}
}
}
}

View File

@ -17,12 +17,18 @@
/// potentially also exposing the store to register stackifying. These both can
/// reduce get_local/set_local traffic.
///
/// This pass also performs this optimization for memcpy, memmove, and memset
/// calls, since the LLVM intrinsics for these return void so they can't use the
/// returned attribute and consequently aren't handled by the OptimizeReturned
/// pass.
///
//===----------------------------------------------------------------------===//
#include "WebAssembly.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@ -49,6 +55,7 @@ public:
AU.addPreserved<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@ -63,6 +70,40 @@ FunctionPass *llvm::createWebAssemblyStoreResults() {
return new WebAssemblyStoreResults();
}
// Replace uses of FromReg with ToReg if they are dominated by MI.
static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
unsigned FromReg, unsigned ToReg,
const MachineRegisterInfo &MRI,
MachineDominatorTree &MDT) {
bool Changed = false;
for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
MachineOperand &O = *I++;
MachineInstr *Where = O.getParent();
if (Where->getOpcode() == TargetOpcode::PHI) {
// PHIs use their operands on their incoming CFG edges rather than
// in their parent blocks. Get the basic block paired with this use
// of FromReg and check that MI's block dominates it.
MachineBasicBlock *Pred =
Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
if (!MDT.dominates(&MBB, Pred))
continue;
} else {
// For a non-PHI, check that MI dominates the instruction in the
// normal way.
if (&MI == Where || !MDT.dominates(&MI, Where))
continue;
}
Changed = true;
DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from "
<< MI << "\n");
O.setReg(ToReg);
// If the store's def was previously dead, it is no longer. But the
// dead flag shouldn't be set yet.
assert(!MI.getOperand(0).isDead() && "Unexpected dead flag");
}
return Changed;
}
bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
DEBUG({
dbgs() << "********** Store Results **********\n"
@ -71,6 +112,9 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
const WebAssemblyTargetLowering &TLI =
*MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering();
auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
assert(MRI.isSSA() && "StoreResults depends on SSA form");
@ -89,36 +133,38 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
case WebAssembly::STORE_F32:
case WebAssembly::STORE_F64:
case WebAssembly::STORE_I32:
case WebAssembly::STORE_I64:
case WebAssembly::STORE_I64: {
unsigned ToReg = MI.getOperand(0).getReg();
unsigned FromReg =
MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
MachineOperand &O = *I++;
MachineInstr *Where = O.getParent();
if (Where->getOpcode() == TargetOpcode::PHI) {
// PHIs use their operands on their incoming CFG edges rather than
// in their parent blocks. Get the basic block paired with this use
// of FromReg and check that MI's block dominates it.
MachineBasicBlock *Pred =
Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
if (!MDT.dominates(&MBB, Pred))
continue;
} else {
// For a non-PHI, check that MI dominates the instruction in the
// normal way.
if (&MI == Where || !MDT.dominates(&MI, Where))
continue;
Changed |= ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
break;
}
case WebAssembly::CALL_I32:
case WebAssembly::CALL_I64: {
MachineOperand &Op1 = MI.getOperand(1);
if (Op1.isSymbol()) {
StringRef Name(Op1.getSymbolName());
if (Name == TLI.getLibcallName(RTLIB::MEMCPY) ||
Name == TLI.getLibcallName(RTLIB::MEMMOVE) ||
Name == TLI.getLibcallName(RTLIB::MEMSET)) {
LibFunc::Func Func;
if (LibInfo.getLibFunc(Name, Func)) {
if (!MI.getOperand(2).isReg())
report_fatal_error(
"Call to builtin function with wrong signature");
unsigned FromReg = MI.getOperand(2).getReg();
unsigned ToReg = MI.getOperand(0).getReg();
if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg))
report_fatal_error(
"Call to builtin function with wrong signature");
Changed |=
ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT);
}
}
Changed = true;
DEBUG(dbgs() << "Setting operand " << O << " in " << *Where
<< " from " << MI << "\n");
O.setReg(ToReg);
// If the store's def was previously dead, it is no longer. But the
// dead flag shouldn't be set yet.
assert(!MI.getOperand(0).isDead() && "Dead flag set on store result");
}
}
}
}
return Changed;

View File

@ -21,8 +21,8 @@ define i32 @foo() {
; CHECK-LABEL: call_memcpy:
; CHECK-NEXT: .param i32, i32, i32{{$}}
; CHECK-NEXT: .result i32{{$}}
; CHECK-NEXT: i32.call $discard=, memcpy@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return $0{{$}}
; CHECK-NEXT: i32.call $push0=, memcpy@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop0{{$}}
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)

View File

@ -0,0 +1,60 @@
; RUN: llc < %s -asm-verbose=false | FileCheck %s
; Test memcpy, memmove, and memset intrinsics.
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
; Test that return values are optimized.
; CHECK-LABEL: copy_yes:
; CHECK: i32.call $push0=, memcpy@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i8* @copy_yes(i8* %dst, i8* %src, i32 %len) {
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
ret i8* %dst
}
; CHECK-LABEL: copy_no:
; CHECK: i32.call $discard=, memcpy@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return{{$}}
define void @copy_no(i8* %dst, i8* %src, i32 %len) {
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
ret void
}
; CHECK-LABEL: move_yes:
; CHECK: i32.call $push0=, memmove@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i8* @move_yes(i8* %dst, i8* %src, i32 %len) {
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
ret i8* %dst
}
; CHECK-LABEL: move_no:
; CHECK: i32.call $discard=, memmove@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return{{$}}
define void @move_no(i8* %dst, i8* %src, i32 %len) {
call void @llvm.memmove.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 %len, i32 1, i1 false)
ret void
}
; CHECK-LABEL: set_yes:
; CHECK: i32.call $push0=, memset@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop0{{$}}
define i8* @set_yes(i8* %dst, i8 %src, i32 %len) {
call void @llvm.memset.p0i8.i32(i8* %dst, i8 %src, i32 %len, i32 1, i1 false)
ret i8* %dst
}
; CHECK-LABEL: set_no:
; CHECK: i32.call $discard=, memset@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return{{$}}
define void @set_no(i8* %dst, i8 %src, i32 %len) {
call void @llvm.memset.p0i8.i32(i8* %dst, i8 %src, i32 %len, i32 1, i1 false)
ret void
}