[SampleFDO] New hierarchical discriminator for Flow Sensitive SampleFDO

This patch implements first part of Flow Sensitive SampleFDO (FSAFDO).
It has the following changes:
(1) disable current discriminator encoding scheme,
(2) new hierarchical discriminator for FSAFDO.

For this patch, option "-enable-fs-discriminator=true" turns on the new
functionality. Option "-enable-fs-discriminator=false" (the default)
keeps the current SampleFDO behavior. When the fs-discriminator is
enabled, we insert a flag variable, namely, llvm_fs_discriminator, to
the object. This symbol will checked by create_llvm_prof tool, and used
to generate a profile with FS-AFDO discriminators enabled. If this
happens, for an extbinary format profile, create_llvm_prof tool
will add a flag to profile summary section.

Differential Revision: https://reviews.llvm.org/D102246
This commit is contained in:
Rong Xu 2021-05-18 16:08:38 -07:00
parent ff99fdf63f
commit 886629a8c9
16 changed files with 658 additions and 40 deletions

View File

@ -0,0 +1,74 @@
//===----- MIRFSDiscriminator.h: MIR FS Discriminator Support --0-- c++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the supporting functions for adding Machine level IR
// Flow Sensitive discriminators to the instruction debug information. With
// this, a cloned machine instruction in a different MachineBasicBlock will
// have its own discriminator value. This is done in a MIRAddFSDiscriminators
// pass.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_MIRFSDISCRIMINATOR_H
#define LLVM_CODEGEN_MIRFSDISCRIMINATOR_H
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include <cassert>
namespace llvm {
class MIRAddFSDiscriminators : public MachineFunctionPass {
MachineFunction *MF;
unsigned LowBit;
unsigned HighBit;
public:
static char ID;
/// FS bits that will be used in this pass (numbers are 0 based and
/// inclusive).
MIRAddFSDiscriminators(unsigned LowBit = 0, unsigned HighBit = 0)
: MachineFunctionPass(ID), LowBit(LowBit), HighBit(HighBit) {
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
}
/// getNumFSBBs() - Return the number of machine BBs that have FS samples.
unsigned getNumFSBBs();
/// getNumFSSamples() - Return the number of samples that have flow sensitive
/// values.
uint64_t getNumFSSamples();
/// getMachineFunction - Return the current machine function.
const MachineFunction *getMachineFunction() const { return MF; }
private:
bool runOnMachineFunction(MachineFunction &) override;
};
} // namespace llvm
#endif // LLVM_CODEGEN_MIRFSDISCRIMINATOR_H

View File

@ -165,6 +165,9 @@ namespace llvm {
/// This pass perform post-ra machine sink for COPY instructions.
extern char &PostRAMachineSinkingID;
/// This pass adds flow sensitive discriminators.
extern char &MIRAddFSDiscriminatorsID;
/// FastRegisterAllocation Pass - This pass register allocates as fast as
/// possible. It is best suited for debug code where live ranges are short.
///
@ -487,6 +490,10 @@ namespace llvm {
/// Create IR Type Promotion pass. \see TypePromotion.cpp
FunctionPass *createTypePromotionPass();
/// Add Flow Sensitive Discriminators.
FunctionPass *createMIRAddFSDiscriminatorsPass(unsigned LowBit,
unsigned HighBit);
/// Creates MIR Debugify pass. \see MachineDebugify.cpp
ModulePass *createDebugifyMachineModulePass();

View File

@ -26,6 +26,8 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Discriminator.h"
#include <cassert>
#include <climits>
#include <cstddef>
@ -60,6 +62,8 @@
namespace llvm {
extern cl::opt<bool> EnableFSDiscriminator;
class DITypeRefArray {
const MDTuple *N = nullptr;
@ -1576,31 +1580,6 @@ class DILocation : public MDNode {
ShouldCreate);
}
/// With a given unsigned int \p U, use up to 13 bits to represent it.
/// old_bit 1~5 --> new_bit 1~5
/// old_bit 6~12 --> new_bit 7~13
/// new_bit_6 is 0 if higher bits (7~13) are all 0
static unsigned getPrefixEncodingFromUnsigned(unsigned U) {
U &= 0xfff;
return U > 0x1f ? (((U & 0xfe0) << 1) | (U & 0x1f) | 0x20) : U;
}
/// Reverse transformation as getPrefixEncodingFromUnsigned.
static unsigned getUnsignedFromPrefixEncoding(unsigned U) {
if (U & 1)
return 0;
U >>= 1;
return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f);
}
/// Returns the next component stored in discriminator.
static unsigned getNextComponentInDiscriminator(unsigned D) {
if ((D & 1) == 0)
return D >> ((D & 0x40) ? 14 : 7);
else
return D >> 1;
}
TempDILocation cloneImpl() const {
// Get the raw scope/inlinedAt since it is possible to invoke this on
// a DILocation containing temporary metadata.
@ -1608,14 +1587,6 @@ class DILocation : public MDNode {
getRawInlinedAt(), isImplicitCode());
}
static unsigned encodeComponent(unsigned C) {
return (C == 0) ? 1U : (getPrefixEncodingFromUnsigned(C) << 1);
}
static unsigned encodingBits(unsigned C) {
return (C == 0) ? 1 : (C > 0x1f ? 14 : 7);
}
public:
// Disallow replacing operands.
void replaceOperandWith(unsigned I, Metadata *New) = delete;
@ -1762,8 +1733,20 @@ public:
static
const DILocation *getMergedLocations(ArrayRef<const DILocation *> Locs);
/// Return the masked discriminator value for an input discrimnator value D
/// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base).
// Example: an input of (0x1FF, 7) returns 0xFF.
static unsigned getMaskedDiscriminator(unsigned D, unsigned B) {
return (D & getN1Bits(B));
}
/// Return the bits used for base discriminators.
static unsigned getBaseDiscriminatorBits() { return BASE_DIS_BIT_END; }
/// Returns the base discriminator for a given encoded discriminator \p D.
static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) {
if (EnableFSDiscriminator)
return getMaskedDiscriminator(D, getBaseDiscriminatorBits());
return getUnsignedFromPrefixEncoding(D);
}
@ -1785,6 +1768,8 @@ public:
/// Returns the duplication factor for a given encoded discriminator \p D, or
/// 1 if no value or 0 is encoded.
static unsigned getDuplicationFactorFromDiscriminator(unsigned D) {
if (EnableFSDiscriminator)
return 1;
D = getNextComponentInDiscriminator(D);
unsigned Ret = getUnsignedFromPrefixEncoding(D);
if (Ret == 0)
@ -2226,6 +2211,14 @@ unsigned DILocation::getCopyIdentifier() const {
Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
unsigned BD, DF, CI;
if (EnableFSDiscriminator) {
BD = getBaseDiscriminator();
if (D == BD)
return this;
return cloneWithDiscriminator(D);
}
decodeDiscriminator(getDiscriminator(), BD, DF, CI);
if (D == BD)
return this;
@ -2235,6 +2228,8 @@ Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D)
}
Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
assert(!EnableFSDiscriminator && "FSDiscriminator should not call this.");
DF *= getDuplicationFactor();
if (DF <= 1)
return this;

View File

@ -275,6 +275,7 @@ void initializeLowerSwitchLegacyPassPass(PassRegistry &);
void initializeLowerTypeTestsPass(PassRegistry&);
void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &);
void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &);
void initializeMIRAddFSDiscriminatorsPass(PassRegistry &);
void initializeMIRCanonicalizerPass(PassRegistry &);
void initializeMIRNamerPass(PassRegistry &);
void initializeMIRPrintingPassPass(PassRegistry&);

View File

@ -171,6 +171,9 @@ struct Config {
bool ShouldDiscardValueNames = true;
DiagnosticHandlerFunction DiagHandler;
/// Add FSAFDO discriminators.
bool AddFSDiscriminator = false;
/// If this field is set, LTO will write input file paths and symbol
/// resolutions here in llvm-lto2 command line flag format. This can be
/// used for testing and for running the LTO pipeline outside of the linker

View File

@ -0,0 +1,73 @@
//===---- llvm/Support/Discriminator.h -- Discriminator Utils ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the constants and utility functions for discriminators.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_DISCRIMINATOR_H
#define LLVM_SUPPORT_DISCRIMINATOR_H
// Utility functions for encoding / decoding discriminators.
/// With a given unsigned int \p U, use up to 13 bits to represent it.
/// old_bit 1~5 --> new_bit 1~5
/// old_bit 6~12 --> new_bit 7~13
/// new_bit_6 is 0 if higher bits (7~13) are all 0
static inline unsigned getPrefixEncodingFromUnsigned(unsigned U) {
U &= 0xfff;
return U > 0x1f ? (((U & 0xfe0) << 1) | (U & 0x1f) | 0x20) : U;
}
/// Reverse transformation as getPrefixEncodingFromUnsigned.
static inline unsigned getUnsignedFromPrefixEncoding(unsigned U) {
if (U & 1)
return 0;
U >>= 1;
return (U & 0x20) ? (((U >> 1) & 0xfe0) | (U & 0x1f)) : (U & 0x1f);
}
/// Returns the next component stored in discriminator.
static inline unsigned getNextComponentInDiscriminator(unsigned D) {
if ((D & 1) == 0)
return D >> ((D & 0x40) ? 14 : 7);
else
return D >> 1;
}
static inline unsigned encodeComponent(unsigned C) {
return (C == 0) ? 1U : (getPrefixEncodingFromUnsigned(C) << 1);
}
static inline unsigned encodingBits(unsigned C) {
return (C == 0) ? 1 : (C > 0x1f ? 14 : 7);
}
// Some constants used in FS Discriminators.
#define BASE_DIS_BIT_BEG 0
#define BASE_DIS_BIT_END 7
#define PASS_1_DIS_BIT_BEG 8
#define PASS_1_DIS_BIT_END 13
#define PASS_2_DIS_BIT_BEG 14
#define PASS_2_DIS_BIT_END 19
#define PASS_3_DIS_BIT_BEG 20
#define PASS_3_DIS_BIT_END 25
#define PASS_LAST_DIS_BIT_BEG 26
#define PASS_LAST_DIS_BIT_END 31
// Set bits range [0 .. n] to 1. Used in FS Discriminators.
static inline unsigned getN1Bits(int N) {
if (N >= 31)
return 0xFFFFFFFF;
return (1 << (N + 1)) - 1;
}
#endif /* LLVM_SUPPORT_DISCRIMINATOR_H */

View File

@ -106,6 +106,7 @@ add_llvm_component_library(LLVMCodeGen
MachineStripDebug.cpp
MachineTraceMetrics.cpp
MachineVerifier.cpp
MIRFSDiscriminator.cpp
MIRYamlMapping.cpp
ModuloSchedule.cpp
MultiHazardRecognizer.cpp

View File

@ -0,0 +1,139 @@
//===-------- MIRFSDiscriminator.cpp: Flow Sensitive Discriminator --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file provides the implementation of a machine pass that adds the flow
// sensitive discriminator to the instruction debug information.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MIRFSDiscriminator.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <unordered_map>
using namespace llvm;
#define DEBUG_TYPE "mirfs-discriminators"
char MIRAddFSDiscriminators::ID = 0;
INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE,
"Add MIR Flow Sensitive Discriminators",
/* cfg = */ false, /* is_analysis = */ false)
char &llvm::MIRAddFSDiscriminatorsID = MIRAddFSDiscriminators::ID;
FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(unsigned LowBit,
unsigned HighBit) {
return new MIRAddFSDiscriminators(LowBit, HighBit);
}
// Compute a hash value using debug line number, and the line numbers from the
// inline stack.
static uint64_t getCallStackHash(const MachineBasicBlock &BB,
const MachineInstr &MI,
const DILocation *DIL) {
uint64_t Ret = MD5Hash(std::to_string(DIL->getLine()));
Ret ^= MD5Hash(BB.getName());
Ret ^= MD5Hash(DIL->getScope()->getSubprogram()->getLinkageName());
for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
Ret ^= MD5Hash(std::to_string(DIL->getLine()));
Ret ^= MD5Hash(DIL->getScope()->getSubprogram()->getLinkageName());
}
return Ret;
}
// Traverse the CFG and assign FD discriminators. If two instructions
// have the same lineno and discriminator, but residing in different BBs,
// the latter instruction will get a new discriminator value. The new
// discriminator keeps the existing discriminator value but sets new bits
// b/w LowBit and HighBit.
bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
if (!EnableFSDiscriminator)
return false;
bool Changed = false;
using LocationDiscriminator = std::tuple<StringRef, unsigned, unsigned>;
using BBSet = DenseSet<const MachineBasicBlock *>;
using LocationDiscriminatorBBMap = DenseMap<LocationDiscriminator, BBSet>;
using LocationDiscriminatorCurrPassMap =
DenseMap<LocationDiscriminator, unsigned>;
LocationDiscriminatorBBMap LDBM;
LocationDiscriminatorCurrPassMap LDCM;
// Mask of discriminators before this pass.
unsigned BitMaskBefore = getN1Bits(LowBit);
// Mask of discriminators including this pass.
unsigned BitMaskNow = getN1Bits(HighBit);
// Mask of discriminators for bits specific to this pass.
unsigned BitMaskThisPass = BitMaskNow ^ BitMaskBefore;
unsigned NumNewD = 0;
LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: "
<< MF.getFunction().getName() << "\n");
for (MachineBasicBlock &BB : MF) {
for (MachineInstr &I : BB) {
const DILocation *DIL = I.getDebugLoc().get();
if (!DIL)
continue;
unsigned LineNo = DIL->getLine();
if (LineNo == 0)
continue;
unsigned Discriminator = DIL->getDiscriminator();
LocationDiscriminator LD = {DIL->getFilename(), LineNo, Discriminator};
auto &BBMap = LDBM[LD];
auto R = BBMap.insert(&BB);
if (BBMap.size() == 1)
continue;
unsigned DiscriminatorCurrPass;
DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD];
DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit;
DiscriminatorCurrPass += getCallStackHash(BB, I, DIL);
DiscriminatorCurrPass &= BitMaskThisPass;
unsigned NewD = Discriminator | DiscriminatorCurrPass;
const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD);
if (!NewDIL) {
LLVM_DEBUG(dbgs() << "Could not encode discriminator: "
<< DIL->getFilename() << ":" << DIL->getLine() << ":"
<< DIL->getColumn() << ":" << Discriminator << " "
<< I << "\n");
continue;
}
I.setDebugLoc(NewDIL);
NumNewD++;
LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
<< DIL->getColumn() << ": add FS discriminator, from "
<< Discriminator << " -> " << NewD << "\n");
Changed = true;
}
}
if (Changed) {
Module *M = MF.getFunction().getParent();
const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
if (!M->getGlobalVariable(FSDiscriminatorVar)) {
auto &Context = M->getContext();
// Create a global variable to flag that FSDiscriminators are used.
new GlobalVariable(*M, Type::getInt1Ty(Context), true,
GlobalValue::WeakAnyLinkage,
ConstantInt::getTrue(Context), FSDiscriminatorVar);
}
LLVM_DEBUG(dbgs() << "Num of FS Discriminators: " << NumNewD << "\n");
}
return Changed;
}

View File

@ -39,6 +39,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Discriminator.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/Threading.h"
@ -165,6 +166,13 @@ static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2",
"Disable the abort but emit a diagnostic on failure")));
// An option that disables inserting FS-AFDO discriminators before emit.
// This is mainly for debugging and tuning purpose.
static cl::opt<bool>
FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
cl::desc("Do not insert FS-AFDO discriminators before "
"emit."));
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
@ -334,6 +342,8 @@ struct InsertedPass {
namespace llvm {
extern cl::opt<bool> EnableFSDiscriminator;
class PassConfigImpl {
public:
// List of passes explicitly substituted by this target. Normally this is
@ -1167,6 +1177,10 @@ void TargetPassConfig::addMachinePasses() {
addPass(&XRayInstrumentationID);
addPass(&PatchableFunctionID);
if (EnableFSDiscriminator && !FSNoFinalDiscrim)
addPass(createMIRAddFSDiscriminatorsPass(PASS_LAST_DIS_BIT_BEG,
PASS_LAST_DIS_BIT_END));
addPreEmitPass();
if (TM->Options.EnableIPRA)

View File

@ -23,6 +23,13 @@
using namespace llvm;
namespace llvm {
// Use FS-AFDO discriminator.
cl::opt<bool> EnableFSDiscriminator(
"enable-fs-discriminator", cl::Hidden, cl::init(false),
cl::desc("Enable adding flow sensitive discriminators"));
} // namespace llvm
const DIExpression::FragmentInfo DebugVariable::DefaultFragment = {
std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()};

View File

@ -215,10 +215,15 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
PGOOptions::SampleUse, PGOOptions::NoCSAction, true);
else if (Conf.RunCSIRInstr) {
PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping,
PGOOptions::IRUse, PGOOptions::CSIRInstr);
PGOOptions::IRUse, PGOOptions::CSIRInstr,
Conf.AddFSDiscriminator);
} else if (!Conf.CSIRProfile.empty()) {
PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
PGOOptions::IRUse, PGOOptions::CSIRUse);
PGOOptions::IRUse, PGOOptions::CSIRUse,
Conf.AddFSDiscriminator);
} else if (Conf.AddFSDiscriminator) {
PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
PGOOptions::NoCSAction, true);
}
LoopAnalysisManager LAM;

View File

@ -570,7 +570,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
for (Loop *SubLoop : *L)
LoopsToSimplify.insert(SubLoop);
if (Header->getParent()->isDebugInfoForProfiling())
// When a FSDiscriminator is enabled, we don't need to add the multiply
// factors to the discriminators.
if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator)
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))

View File

@ -349,7 +349,9 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
if (Header->getParent()->isDebugInfoForProfiling())
// When a FSDiscriminator is enabled, we don't need to add the multiply
// factors to the discriminators.
if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator)
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
if (!isa<DbgInfoIntrinsic>(&I))

View File

@ -1047,8 +1047,11 @@ static Instruction *getDebugLocFromInstOrOperands(Instruction *I) {
void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
if (const Instruction *Inst = dyn_cast_or_null<Instruction>(Ptr)) {
const DILocation *DIL = Inst->getDebugLoc();
// When a FSDiscriminator is enabled, we don't need to add the multiply
// factors to the discriminators.
if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
!isa<DbgInfoIntrinsic>(Inst)) {
!isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
assert(!VF.isScalable() && "scalable vectors not yet supported.");
auto NewDIL =
DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
@ -1058,8 +1061,7 @@ void InnerLoopVectorizer::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr)
LLVM_DEBUG(dbgs()
<< "Failed to create new discriminator: "
<< DIL->getFilename() << " Line: " << DIL->getLine());
}
else
} else
B.SetCurrentDebugLocation(DIL);
} else
B.SetCurrentDebugLocation(DebugLoc());

View File

@ -0,0 +1,60 @@
; RUN: llc -enable-fs-discriminator < %s | FileCheck %s
;
; Check that fs-afdo discriminators are generated.
; CHECK: .loc 1 7 3 is_stmt 0 discriminator 2 # foo.c:7:3
; Check: .loc 1 9 5 is_stmt 1 discriminator 2 # foo.c:9:5
; CHECK: .loc 1 9 5 is_stmt 0 discriminator 3623878658 # foo.c:9:5
; CHECK: .loc 1 7 3 is_stmt 1 discriminator 805306370 # foo.c:7:3
; Check that variable __llvm_fs_discriminator__ is generated.
; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__
; CHECK: .section .rodata,"a",@progbits
; CHECK: .weak __llvm_fs_discriminator__
; CHECK: __llvm_fs_discriminator__:
; CHECK: .byte 1
; CHECK: .size __llvm_fs_discriminator__, 1
target triple = "x86_64-unknown-linux-gnu"
%struct.Node = type { %struct.Node* }
define i32 @foo(%struct.Node* readonly %node, %struct.Node* readnone %root) !dbg !6 {
entry:
%cmp = icmp eq %struct.Node* %node, %root, !dbg !8
br i1 %cmp, label %while.end4, label %while.cond1.preheader.lr.ph, !dbg !10
while.cond1.preheader.lr.ph:
%tobool = icmp eq %struct.Node* %node, null
br i1 %tobool, label %while.cond1.preheader.us.preheader, label %while.body2.preheader, !dbg !11
while.body2.preheader:
br label %while.body2, !dbg !11
while.cond1.preheader.us.preheader:
br label %while.cond1.preheader.us, !dbg !10
while.cond1.preheader.us:
br label %while.cond1.preheader.us, !dbg !10
while.body2:
br label %while.body2, !dbg !11
while.end4:
ret i32 0, !dbg !12
}
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, emissionKind: LineTablesOnly)
!1 = !DIFile(filename: "foo.c", directory: "b/")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{}
!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 5, type: !7, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
!7 = !DISubroutineType(types: !2)
!8 = !DILocation(line: 7, column: 15, scope: !9)
!9 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 2)
!10 = !DILocation(line: 7, column: 3, scope: !9)
!11 = !DILocation(line: 9, column: 5, scope: !9)
!12 = !DILocation(line: 14, column: 3, scope: !6)

View File

@ -0,0 +1,233 @@
; RUN: llc -enable-fs-discriminator < %s | FileCheck %s
;;
;; C source code for the test (compiler at -O3):
;; // A test case for loop unroll.
;;
;; __attribute__((noinline)) int bar(int i){
;; volatile int j;
;; j = i;
;; return j;
;; }
;;
;; unsigned sum;
;; __attribute__((noinline)) void work(int i){
;; if (sum % 7)
;; sum += i;
;; else
;; sum -= i;
;; }
;;
;; __attribute__((noinline)) void foo(){
;; int i, j;
;; for (j = 0; j < 48; j++)
;; for (i = 0; i < 4; i++) {
;; int ii = bar(i+j*48);
;; if (ii % 2)
;; work(ii*2);
;; if (ii % 4)
;; work(ii*3);
;; }
;; }
;;
;; int main() {
;; int i;
;; for (i = 0; i < 10000000; i++) {
;; foo();
;; }
;; }
;;
;; Check that fs-afdo discriminators are generated.
; CHECK: .loc 1 23 9 is_stmt 0 discriminator 1 # unroll.c:23:9
; CHECK: .loc 1 23 9 is_stmt 0 discriminator 1073741825 # unroll.c:23:9
; CHECK: .loc 1 23 9 is_stmt 0 discriminator 2147483649 # unroll.c:23:9
; CHECK: .loc 1 23 9 is_stmt 0 discriminator 268435457 # unroll.c:23:9
;;
;; Check that variable __llvm_fs_discriminator__ is generated.
; CHECK: .type __llvm_fs_discriminator__,@object # @__llvm_fs_discriminator__
; CHECK: .section .rodata,"a",@progbits
; CHECK: .weak __llvm_fs_discriminator__
; CHECK: __llvm_fs_discriminator__:
; CHECK: .byte 1
; CHECK: .size __llvm_fs_discriminator__, 1
target triple = "x86_64-unknown-linux-gnu"
@sum = dso_local local_unnamed_addr global i32 0, align 4
declare i32 @bar(i32 %i) #0
declare void @work(i32 %i) #2
define dso_local void @foo() #0 !dbg !29 {
entry:
br label %for.cond1.preheader, !dbg !30
for.cond1.preheader:
%j.012 = phi i32 [ 0, %entry ], [ %inc11, %if.end9.3 ]
%mul = mul nuw nsw i32 %j.012, 48
%call = tail call i32 @bar(i32 %mul), !dbg !32
%0 = and i32 %call, 1, !dbg !33
%tobool.not = icmp eq i32 %0, 0, !dbg !33
br i1 %tobool.not, label %if.end, label %if.then, !dbg !35
if.then:
%mul4 = shl nsw i32 %call, 1, !dbg !36
tail call void @work(i32 %mul4), !dbg !37
br label %if.end, !dbg !38
if.end:
%1 = and i32 %call, 3, !dbg !39
%tobool6.not = icmp eq i32 %1, 0, !dbg !39
br i1 %tobool6.not, label %if.end9, label %if.then7, !dbg !40
if.then7:
%mul8 = mul nsw i32 %call, 3, !dbg !41
tail call void @work(i32 %mul8), !dbg !42
br label %if.end9, !dbg !43
if.end9:
%add.1 = or i32 %mul, 1, !dbg !44
%call.1 = tail call i32 @bar(i32 %add.1), !dbg !32
%2 = and i32 %call.1, 1, !dbg !33
%tobool.not.1 = icmp eq i32 %2, 0, !dbg !33
br i1 %tobool.not.1, label %if.end.1, label %if.then.1, !dbg !35
for.end12:
ret void, !dbg !45
if.then.1:
%mul4.1 = shl nsw i32 %call.1, 1, !dbg !36
tail call void @work(i32 %mul4.1), !dbg !37
br label %if.end.1, !dbg !38
if.end.1:
%3 = and i32 %call.1, 3, !dbg !39
%tobool6.not.1 = icmp eq i32 %3, 0, !dbg !39
br i1 %tobool6.not.1, label %if.end9.1, label %if.then7.1, !dbg !40
if.then7.1:
%mul8.1 = mul nsw i32 %call.1, 3, !dbg !41
tail call void @work(i32 %mul8.1), !dbg !42
br label %if.end9.1, !dbg !43
if.end9.1:
%add.2 = or i32 %mul, 2, !dbg !44
%call.2 = tail call i32 @bar(i32 %add.2), !dbg !32
%4 = and i32 %call.2, 1, !dbg !33
%tobool.not.2 = icmp eq i32 %4, 0, !dbg !33
br i1 %tobool.not.2, label %if.end.2, label %if.then.2, !dbg !35
if.then.2:
%mul4.2 = shl nsw i32 %call.2, 1, !dbg !36
tail call void @work(i32 %mul4.2), !dbg !37
br label %if.end.2, !dbg !38
if.end.2:
%5 = and i32 %call.2, 3, !dbg !39
%tobool6.not.2 = icmp eq i32 %5, 0, !dbg !39
br i1 %tobool6.not.2, label %if.end9.2, label %if.then7.2, !dbg !40
if.then7.2:
%mul8.2 = mul nsw i32 %call.2, 3, !dbg !41
tail call void @work(i32 %mul8.2), !dbg !42
br label %if.end9.2, !dbg !43
if.end9.2:
%add.3 = or i32 %mul, 3, !dbg !44
%call.3 = tail call i32 @bar(i32 %add.3), !dbg !32
%6 = and i32 %call.3, 1, !dbg !33
%tobool.not.3 = icmp eq i32 %6, 0, !dbg !33
br i1 %tobool.not.3, label %if.end.3, label %if.then.3, !dbg !35
if.then.3:
%mul4.3 = shl nsw i32 %call.3, 1, !dbg !36
tail call void @work(i32 %mul4.3), !dbg !37
br label %if.end.3, !dbg !38
if.end.3:
%7 = and i32 %call.3, 3, !dbg !39
%tobool6.not.3 = icmp eq i32 %7, 0, !dbg !39
br i1 %tobool6.not.3, label %if.end9.3, label %if.then7.3, !dbg !40
if.then7.3:
%mul8.3 = mul nsw i32 %call.3, 3, !dbg !41
tail call void @work(i32 %mul8.3), !dbg !42
br label %if.end9.3, !dbg !43
if.end9.3:
%inc11 = add nuw nsw i32 %j.012, 1, !dbg !46
%exitcond.not = icmp eq i32 %inc11, 48, !dbg !48
br i1 %exitcond.not, label %for.end12, label %for.cond1.preheader, !dbg !30, !llvm.loop !49
}
attributes #0 = { noinline nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind willreturn }
attributes #2 = { nofree noinline norecurse nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
!1 = !DIFile(filename: "unroll.c", directory: "a/")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!8 = !DISubroutineType(types: !2)
!9 = !DILocation(line: 4, column: 3, scope: !7)
!10 = !DILocation(line: 5, column: 5, scope: !7)
!11 = !{!12, !12, i64 0}
!12 = !{!"int", !13, i64 0}
!13 = !{!"omnipotent char", !14, i64 0}
!14 = !{!"Simple C/C++ TBAA"}
!15 = !DILocation(line: 6, column: 10, scope: !7)
!16 = !DILocation(line: 7, column: 1, scope: !7)
!17 = !DILocation(line: 6, column: 3, scope: !18)
!18 = !DILexicalBlockFile(scope: !7, file: !1, discriminator: 1)
!19 = distinct !DISubprogram(name: "work", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!20 = !DILocation(line: 11, column: 7, scope: !19)
!21 = !DILocation(line: 11, column: 11, scope: !22)
!22 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 1)
!23 = !DILocation(line: 11, column: 11, scope: !24)
!24 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 2)
!25 = !DILocation(line: 11, column: 7, scope: !26)
!26 = !DILexicalBlockFile(scope: !19, file: !1, discriminator: 3)
!27 = !DILocation(line: 0, scope: !22)
!28 = !DILocation(line: 15, column: 1, scope: !19)
!29 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 17, type: !8, scopeLine: 17, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!30 = !DILocation(line: 19, column: 3, scope: !31)
!31 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 2)
!32 = !DILocation(line: 21, column: 16, scope: !31)
!33 = !DILocation(line: 22, column: 14, scope: !34)
!34 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 1)
!35 = !DILocation(line: 22, column: 11, scope: !31)
!36 = !DILocation(line: 23, column: 16, scope: !29)
!37 = !DILocation(line: 23, column: 9, scope: !34)
!38 = !DILocation(line: 23, column: 9, scope: !31)
!39 = !DILocation(line: 24, column: 14, scope: !34)
!40 = !DILocation(line: 24, column: 11, scope: !31)
!41 = !DILocation(line: 25, column: 16, scope: !29)
!42 = !DILocation(line: 25, column: 9, scope: !34)
!43 = !DILocation(line: 25, column: 9, scope: !31)
!44 = !DILocation(line: 21, column: 21, scope: !34)
!45 = !DILocation(line: 27, column: 1, scope: !29)
!46 = !DILocation(line: 19, column: 24, scope: !47)
!47 = !DILexicalBlockFile(scope: !29, file: !1, discriminator: 3)
!48 = !DILocation(line: 19, column: 17, scope: !34)
!49 = distinct !{!49, !50, !51}
!50 = !DILocation(line: 19, column: 3, scope: !29)
!51 = !DILocation(line: 26, column: 3, scope: !29)
!52 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 29, type: !8, scopeLine: 29, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!53 = !DILocation(line: 31, column: 3, scope: !54)
!54 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 2)
!55 = !DILocation(line: 32, column: 5, scope: !52)
!56 = !DILocation(line: 31, column: 30, scope: !57)
!57 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 3)
!58 = !DILocation(line: 31, column: 17, scope: !59)
!59 = !DILexicalBlockFile(scope: !52, file: !1, discriminator: 1)
!60 = distinct !{!60, !61, !62}
!61 = !DILocation(line: 31, column: 3, scope: !52)
!62 = !DILocation(line: 33, column: 3, scope: !52)
!63 = !DILocation(line: 34, column: 1, scope: !52)