From ae1f265bf19858ada9c9b6761fba7f8e1f9e395a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 16 Dec 2015 01:01:30 +0000 Subject: [PATCH] [EarlyCSE] DSE of stores which write back loaded values Extend EarlyCSE with an additional style of dead store elimination. If we write back a value just read from that memory location, we can eliminate the store under the assumption that the value hasn't changed. I'm implementing this mostly because I noticed the omission when looking at the code. It seemed strange to have InstCombine have a peephole which was more powerful than EarlyCSE. :) Differential Revision: http://reviews.llvm.org/D15397 llvm-svn: 255739 --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 27 +++++++++ llvm/test/Transforms/EarlyCSE/basic.ll | 74 +++++++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 6fa194e57092..03f8c05c9d32 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -687,6 +687,33 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { continue; } + // write back DSE - If we write back the same value we just loaded from + // the same location and haven't passed any intervening writes or ordering + // operations, we can remove the write. The primary benefit is in allowing + // the available load table to remain valid and value forward past where + // the store originally was. + if (MemInst.isValid() && MemInst.isStore()) { + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); + if (InVal.Data && + InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) && + InVal.Generation == CurrentGeneration && + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing stores with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered()) { + assert((!LastStore || + ParseMemoryInst(LastStore, TTI).getPointerOperand() == + MemInst.getPointerOperand()) && + "can't have an intervening store!"); + DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n'); + Inst->eraseFromParent(); + Changed = true; + ++NumDSE; + // We can avoid incrementing the generation count since we were able + // to eliminate this store. + continue; + } + } + // Okay, this isn't something we can CSE at all. Check to see if it is // something that could modify memory. If so, our available memory values // cannot be used so bump the generation count. diff --git a/llvm/test/Transforms/EarlyCSE/basic.ll b/llvm/test/Transforms/EarlyCSE/basic.ll index 43b5e6098f6a..8c9b74b4d0e1 100644 --- a/llvm/test/Transforms/EarlyCSE/basic.ll +++ b/llvm/test/Transforms/EarlyCSE/basic.ll @@ -203,3 +203,77 @@ define i32 @test12(i1 %B, i32* %P1, i32* %P2) { ; CHECK: load i32, i32* %P1 ; CHECK: load i32, i32* %P1 } + +define void @dse1(i32 *%P) { +; CHECK-LABEL: @dse1 +; CHECK-NOT: store + %v = load i32, i32* %P + store i32 %v, i32* %P + ret void +} + +define void @dse2(i32 *%P) { +; CHECK-LABEL: @dse2 +; CHECK-NOT: store + %v = load atomic i32, i32* %P seq_cst, align 4 + store i32 %v, i32* %P + ret void +} + +define void @dse3(i32 *%P) { +; CHECK-LABEL: @dse3 +; CHECK-NOT: store + %v = load atomic i32, i32* %P seq_cst, align 4 + store atomic i32 %v, i32* %P unordered, align 4 + ret void +} + +define i32 @dse4(i32 *%P, i32 *%Q) { +; CHECK-LABEL: @dse4 +; CHECK-NOT: store +; CHECK: ret i32 0 + %a = load i32, i32* %Q + %v = load atomic i32, i32* %P unordered, align 4 + store atomic i32 %v, i32* %P unordered, align 4 + %b = load i32, i32* %Q + %res = sub i32 %a, %b + ret i32 %res +} + +; Note that in this example, %P and %Q could in fact be the same +; pointer. %v could be different than the value observed for %a +; and that's okay because we're using relaxed memory ordering. +; The only guarantee we have to provide is that each of the loads +; has to observe some value written to that location. We do +; not have to respect the order in which those writes were done. +define i32 @dse5(i32 *%P, i32 *%Q) { +; CHECK-LABEL: @dse5 +; CHECK-NOT: store +; CHECK: ret i32 0 + %v = load atomic i32, i32* %P unordered, align 4 + %a = load atomic i32, i32* %Q unordered, align 4 + store atomic i32 %v, i32* %P unordered, align 4 + %b = load atomic i32, i32* %Q unordered, align 4 + %res = sub i32 %a, %b + ret i32 %res +} + + +define void @dse_neg1(i32 *%P) { +; CHECK-LABEL: @dse_neg1 +; CHECK: store + %v = load i32, i32* %P + store i32 5, i32* %P + ret void +} + +; Could remove the store, but only if ordering was somehow +; encoded. +define void @dse_neg2(i32 *%P) { +; CHECK-LABEL: @dse_neg2 +; CHECK: store + %v = load i32, i32* %P + store atomic i32 %v, i32* %P seq_cst, align 4 + ret void +} +