diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp index 41a854362c92..0e2a46533532 100644 --- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -246,5 +247,12 @@ bool llvm::bypassSlowDivision( MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache); } + // Above we eagerly create divs and rems, as pairs, so that we can efficiently + // create divrem machine instructions. Now erase any unused divs / rems so we + // don't leave extra instructions sitting around. + for (auto &KV : DivCache) + for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder}) + RecursivelyDeleteTriviallyDeadInstructions(Phi); + return MadeChange; } diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll new file mode 100644 index 000000000000..4846d52f4d26 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll @@ -0,0 +1,29 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; We only use the div instruction -- the rem should be DCE'ed. +; CHECK-LABEL: @div_only +define void @div_only(i64 %a, i64 %b, i64* %retptr) { + ; CHECK: udiv i32 + ; CHECK-NOT: urem + ; CHECK: sdiv i64 + ; CHECK-NOT: rem + %d = sdiv i64 %a, %b + store i64 %d, i64* %retptr + ret void +} + +; We only use the rem instruction -- the div should be DCE'ed. +; CHECK-LABEL: @rem_only +define void @rem_only(i64 %a, i64 %b, i64* %retptr) { + ; CHECK-NOT: div + ; CHECK: urem i32 + ; CHECK-NOT: div + ; CHECK: rem i64 + ; CHECK-NOT: div + %d = srem i64 %a, %b + store i64 %d, i64* %retptr + ret void +}