[X86] Add Support for Load Hardening to Mitigate Load Value Injection (LVI)

After finding all such gadgets in a given function, the pass minimally inserts
LFENCE instructions in such a manner that the following property is satisfied:
for all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
least one LFENCE instruction. The algorithm that implements this minimal
insertion is influenced by an academic paper that minimally inserts memory
fences for high-performance concurrent programs:

http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf

The algorithm implemented in this pass is as follows:

1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the following components:
  -SOURCE instructions (also includes function arguments)
  -SINK instructions
  -Basic block entry points
  -Basic block terminators
  -LFENCE instructions
2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e., gadgets) are already mitigated by existing LFENCEs. If all gadgets have been mitigated, go to step 6.
3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
4. Insert one LFENCE along each CFG edge that was cut in step 3.
5. Go to step 2.
6. If any LFENCEs were inserted, return true from runOnFunction() to tell LLVM that the function was modified.

By default, the heuristic used in Step 3 is a greedy heuristic that avoids
inserting LFENCEs into loops unless absolutely necessary. There is also a
CLI option to load a plugin that can provide even better optimization,
inserting fewer fences, while still mitigating all of the LVI gadgets.
The plugin can be found here: https://github.com/intel/lvi-llvm-optimization-plugin,
and a description of the pass's behavior with the plugin can be found here:
https://software.intel.com/security-software-guidance/insights/optimized-mitigation-approach-load-value-injection.

Differential Revision: https://reviews.llvm.org/D75937
This commit is contained in:
Scott Constable 2020-04-03 13:41:34 -07:00 committed by Craig Topper
parent ba1ffd25c1
commit 62c42e29ba
2 changed files with 379 additions and 5 deletions

View File

@ -9,7 +9,30 @@
/// Description: This pass finds Load Value Injection (LVI) gadgets consisting
/// of a load from memory (i.e., SOURCE), and any operation that may transmit
/// the value loaded from memory over a covert channel, or use the value loaded
/// from memory to determine a branch/call target (i.e., SINK).
/// from memory to determine a branch/call target (i.e., SINK). After finding
/// all such gadgets in a given function, the pass minimally inserts LFENCE
/// instructions in such a manner that the following property is satisfied: for
/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at
/// least one LFENCE instruction. The algorithm that implements this minimal
/// insertion is influenced by an academic paper that minimally inserts memory
/// fences for high-performance concurrent programs:
/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf
/// The algorithm implemented in this pass is as follows:
/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the
/// following components:
/// - SOURCE instructions (also includes function arguments)
/// - SINK instructions
/// - Basic block entry points
/// - Basic block terminators
/// - LFENCE instructions
/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e.,
/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been
/// mitigated, go to step 6.
/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion.
/// 4. Insert one LFENCE along each CFG edge that was cut in step 3.
/// 5. Go to step 2.
/// 6. If any LFENCEs were inserted, return `true` from runOnFunction() to tell
/// LLVM that the function was modified.
///
//===----------------------------------------------------------------------===//
@ -37,6 +60,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
@ -45,11 +69,16 @@ using namespace llvm;
#define PASS_KEY "x86-lvi-load"
#define DEBUG_TYPE PASS_KEY
STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation");
STATISTIC(NumFunctionsConsidered, "Number of functions analyzed");
STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations "
"were deployed");
STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis");
static cl::opt<std::string> OptimizePluginPath(
PASS_KEY "-opt-plugin",
cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden);
static cl::opt<bool> NoConditionalBranches(
PASS_KEY "-no-cbranch",
cl::desc("Don't treat conditional branches as disclosure gadgets. This "
@ -80,6 +109,12 @@ static cl::opt<bool> NoFixedLoads(
"may improve performance, at the cost of security."),
cl::init(false), cl::Hidden);
static llvm::sys::DynamicLibrary OptimizeDL{};
typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size,
unsigned int *edges, int *edge_values,
int *cut_edges /* out */, unsigned int edges_size);
static OptimizeCutT OptimizeCut = nullptr;
#define ARG_NODE nullptr
#define GADGET_EDGE ((int)(-1))
#define WEIGHT(EdgeValue) ((double)(2 * (EdgeValue) + 1))
@ -139,6 +174,11 @@ private:
getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
const MachineDominatorTree &MDT,
const MachineDominanceFrontier &MDF, bool FixedLoads) const;
std::unique_ptr<MachineGadgetGraph>
elimEdges(std::unique_ptr<MachineGadgetGraph> Graph) const;
void cutEdges(MachineGadgetGraph &G, EdgeSet &CutEdges /* out */) const;
int insertFences(MachineGadgetGraph &G,
EdgeSet &CutEdges /* in, out */) const;
bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const;
bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const;
@ -241,14 +281,17 @@ bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
TII = STI->getInstrInfo();
TRI = STI->getRegisterInfo();
LLVM_DEBUG(dbgs() << "Hardening data-dependent loads...\n");
hardenLoads(MF, false);
int FencesInserted = hardenLoads(MF, false);
LLVM_DEBUG(dbgs() << "Hardening data-dependent loads... Done\n");
if (!NoFixedLoads) {
LLVM_DEBUG(dbgs() << "Hardening fixed loads...\n");
hardenLoads(MF, true);
FencesInserted += hardenLoads(MF, true);
LLVM_DEBUG(dbgs() << "Hardening fixed loads... Done\n");
}
return false;
if (FencesInserted > 0)
++NumFunctionsMitigated;
NumFences += FencesInserted;
return (FencesInserted > 0);
}
// Apply the mitigation to `MF`, return the number of fences inserted.
@ -256,6 +299,8 @@ bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
// loads; otherwise, mitigation will be applied to non-fixed loads.
int X86LoadValueInjectionLoadHardeningPass::hardenLoads(MachineFunction &MF,
bool FixedLoads) const {
int FencesInserted = 0;
LLVM_DEBUG(dbgs() << "Building gadget graph...\n");
const auto &MLI = getAnalysis<MachineLoopInfo>();
const auto &MDT = getAnalysis<MachineDominatorTree>();
@ -289,7 +334,27 @@ int X86LoadValueInjectionLoadHardeningPass::hardenLoads(MachineFunction &MF,
return 0;
}
return 0;
do {
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n");
std::unique_ptr<MachineGadgetGraph> ElimGraph = elimEdges(std::move(Graph));
LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n");
if (ElimGraph->NumGadgets == 0)
break;
EdgeSet CutEdges{*ElimGraph};
LLVM_DEBUG(dbgs() << "Cutting edges...\n");
cutEdges(*ElimGraph, CutEdges);
LLVM_DEBUG(dbgs() << "Cutting edges... Done\n");
LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n");
FencesInserted += insertFences(*ElimGraph, CutEdges);
LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n");
Graph.reset(GraphBuilder::trim(
*ElimGraph, MachineGadgetGraph::NodeSet{*ElimGraph}, CutEdges));
} while (true);
return FencesInserted;
}
std::unique_ptr<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph>
@ -461,6 +526,213 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
return G;
}
std::unique_ptr<X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph>
X86LoadValueInjectionLoadHardeningPass::elimEdges(
std::unique_ptr<MachineGadgetGraph> Graph) const {
MachineGadgetGraph::NodeSet ElimNodes{*Graph};
MachineGadgetGraph::EdgeSet ElimEdges{*Graph};
if (Graph->NumFences > 0) { // eliminate fences
for (auto EI = Graph->edges_begin(), EE = Graph->edges_end(); EI != EE;
++EI) {
GTraits::NodeRef Dest = GTraits::edge_dest(*EI);
if (isFence(Dest->value())) {
ElimNodes.insert(Dest);
ElimEdges.insert(EI);
std::for_each(
GTraits::child_edge_begin(Dest), GTraits::child_edge_end(Dest),
[&ElimEdges](GTraits::EdgeRef E) { ElimEdges.insert(&E); });
}
}
LLVM_DEBUG(dbgs() << "Eliminated " << ElimNodes.count()
<< " fence nodes\n");
}
// eliminate gadget edges that are mitigated
int NumGadgets = 0;
MachineGadgetGraph::NodeSet Visited{*Graph}, GadgetSinks{*Graph};
MachineGadgetGraph::EdgeSet ElimGadgets{*Graph};
for (auto NI = GTraits::nodes_begin(Graph.get()),
NE = GTraits::nodes_end(Graph.get());
NI != NE; ++NI) {
// collect the gadgets for this node
for (auto EI = GTraits::child_edge_begin(*NI),
EE = GTraits::child_edge_end(*NI);
EI != EE; ++EI) {
if (MachineGadgetGraph::isGadgetEdge(*EI)) {
++NumGadgets;
ElimGadgets.insert(EI);
GadgetSinks.insert(GTraits::edge_dest(*EI));
}
}
if (GadgetSinks.empty())
continue;
std::function<void(GTraits::NodeRef, bool)> TraverseDFS =
[&](GTraits::NodeRef N, bool FirstNode) {
if (!FirstNode) {
Visited.insert(N);
if (GadgetSinks.contains(N)) {
for (auto CEI = GTraits::child_edge_begin(*NI),
CEE = GTraits::child_edge_end(*NI);
CEI != CEE; ++CEI) {
if (MachineGadgetGraph::isGadgetEdge(*CEI) &&
GTraits::edge_dest(*CEI) == N)
ElimGadgets.erase(CEI);
}
}
}
for (auto CEI = GTraits::child_edge_begin(N),
CEE = GTraits::child_edge_end(N);
CEI != CEE; ++CEI) {
GTraits::NodeRef Dest = GTraits::edge_dest(*CEI);
if (MachineGadgetGraph::isCFGEdge(*CEI) &&
!Visited.contains(Dest) && !ElimEdges.contains(CEI))
TraverseDFS(Dest, false);
}
};
TraverseDFS(*NI, true);
Visited.clear();
GadgetSinks.clear();
}
LLVM_DEBUG(dbgs() << "Eliminated " << ElimGadgets.count()
<< " gadget edges\n");
ElimEdges |= ElimGadgets;
if (!(ElimEdges.empty() && ElimNodes.empty())) {
int NumRemainingGadgets = NumGadgets - ElimGadgets.count();
Graph.reset(GraphBuilder::trim(*Graph, ElimNodes, ElimEdges,
0 /* NumFences */, NumRemainingGadgets));
} else {
Graph->NumFences = 0;
Graph->NumGadgets = NumGadgets;
}
return Graph;
}
void X86LoadValueInjectionLoadHardeningPass::cutEdges(
MachineGadgetGraph &G,
MachineGadgetGraph::EdgeSet &CutEdges /* out */) const {
if (!OptimizePluginPath.empty()) {
if (!OptimizeDL.isValid()) {
std::string ErrorMsg{};
OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
OptimizePluginPath.c_str(), &ErrorMsg);
if (!ErrorMsg.empty())
report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
if (!OptimizeCut)
report_fatal_error("Invalid optimization plugin");
}
auto *Nodes = new unsigned int[G.nodes_size() + 1 /* terminator node */];
auto *Edges = new unsigned int[G.edges_size()];
auto *EdgeCuts = new int[G.edges_size()];
auto *EdgeValues = new int[G.edges_size()];
for (auto *NI = G.nodes_begin(), *NE = G.nodes_end(); NI != NE; ++NI) {
Nodes[std::distance(G.nodes_begin(), NI)] =
std::distance(G.edges_begin(), GTraits::child_edge_begin(NI));
}
Nodes[G.nodes_size()] = G.edges_size(); // terminator node
for (auto *EI = G.edges_begin(), *EE = G.edges_end(); EI != EE; ++EI) {
Edges[std::distance(G.edges_begin(), EI)] =
std::distance(G.nodes_begin(), GTraits::edge_dest(*EI));
EdgeValues[std::distance(G.edges_begin(), EI)] = EI->value();
}
OptimizeCut(Nodes, G.nodes_size(), Edges, EdgeValues, EdgeCuts,
G.edges_size());
for (int I = 0; I < G.edges_size(); ++I) {
if (EdgeCuts[I])
CutEdges.set(I);
}
delete[] Nodes;
delete[] Edges;
delete[] EdgeCuts;
delete[] EdgeValues;
} else { // Use the default greedy heuristic
// Find the cheapest CFG edge that will eliminate a gadget (by being egress
// from a SOURCE node or ingress to a SINK node), and cut it.
MachineGadgetGraph::NodeSet GadgetSinks{G};
MachineGadgetGraph::Edge *CheapestSoFar = nullptr;
for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G);
NI != NE; ++NI) {
for (auto EI = GTraits::child_edge_begin(*NI),
EE = GTraits::child_edge_end(*NI);
EI != EE; ++EI) {
if (MachineGadgetGraph::isGadgetEdge(*EI)) {
// NI is a SOURCE node. Look for a cheap egress edge
for (auto EEI = GTraits::child_edge_begin(*NI); EEI != EE; ++EEI) {
if (MachineGadgetGraph::isCFGEdge(*EEI)) {
if (!CheapestSoFar || EEI->value() < CheapestSoFar->value())
CheapestSoFar = EEI;
}
}
GadgetSinks.insert(GTraits::edge_dest(*EI));
} else { // EI is a CFG edge
if (GadgetSinks.contains(GTraits::edge_dest(*EI))) {
// The dest is a SINK node. Hence EI is an ingress edge
if (!CheapestSoFar || EI->value() < CheapestSoFar->value())
CheapestSoFar = EI;
}
}
}
}
assert(CheapestSoFar && "Failed to cut an edge");
CutEdges.insert(CheapestSoFar);
}
LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n");
}
int X86LoadValueInjectionLoadHardeningPass::insertFences(
MachineGadgetGraph &G, EdgeSet &CutEdges /* in, out */) const {
int FencesInserted = 0, AdditionalEdgesCut = 0;
auto CutAllCFGEdges = [&CutEdges, &AdditionalEdgesCut](GTraits::NodeRef N) {
for (auto CEI = GTraits::child_edge_begin(N),
CEE = GTraits::child_edge_end(N);
CEI != CEE; ++CEI) {
if (MachineGadgetGraph::isCFGEdge(*CEI) && !CutEdges.contains(CEI)) {
CutEdges.insert(CEI);
++AdditionalEdgesCut;
}
}
};
for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G);
NI != NE; ++NI) {
for (auto CEI = GTraits::child_edge_begin(*NI),
CEE = GTraits::child_edge_end(*NI);
CEI != CEE; ++CEI) {
if (CutEdges.contains(CEI)) {
MachineInstr *MI = (*NI)->value(), *Prev;
MachineBasicBlock *MBB;
MachineBasicBlock::iterator InsertionPt;
if (MI == ARG_NODE) { // insert LFENCE at beginning of entry block
MBB = &G.getMF().front();
InsertionPt = MBB->begin();
Prev = nullptr;
} else if (MI->isBranch()) { // insert the LFENCE before the branch
MBB = MI->getParent();
InsertionPt = MI;
Prev = MI->getPrevNode();
CutAllCFGEdges(*NI);
} else { // insert the LFENCE after the instruction
MBB = MI->getParent();
InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end();
Prev = InsertionPt == MBB->end()
? (MBB->empty() ? nullptr : &MBB->back())
: InsertionPt->getPrevNode();
}
if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) &&
(!Prev || !isFence(Prev))) {
BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE));
++FencesInserted;
}
}
}
}
LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n");
LLVM_DEBUG(dbgs() << "Cut an additional " << AdditionalEdgesCut
<< " edges during fence insertion\n");
return FencesInserted;
}
bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory(
const MachineInstr &MI, unsigned Reg) const {
if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE ||

View File

@ -0,0 +1,102 @@
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CBFX
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CB
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-FX
; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-BASE
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @test(i32** %secret, i32 %secret_size) #0 {
; X64-LABEL: test:
entry:
%secret.addr = alloca i32**, align 8
%secret_size.addr = alloca i32, align 4
%ret_val = alloca i32, align 4
%i = alloca i32, align 4
store i32** %secret, i32*** %secret.addr, align 8
store i32 %secret_size, i32* %secret_size.addr, align 4
store i32 0, i32* %ret_val, align 4
call void @llvm.x86.sse2.lfence()
store i32 0, i32* %i, align 4
br label %for.cond
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp)
; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: lfence
; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: jmp .LBB0_1
for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %i, align 4
%1 = load i32, i32* %secret_size.addr, align 4
%cmp = icmp slt i32 %0, %1
br i1 %cmp, label %for.body, label %for.end
; X64: .LBB0_1: # %for.cond
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-CBFX-NEXT: lfence
; X64-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax
; X64-CBFX-NEXT: lfence
; X64-NEXT: jge .LBB0_5
for.body: ; preds = %for.cond
%2 = load i32, i32* %i, align 4
%rem = srem i32 %2, 2
%cmp1 = icmp eq i32 %rem, 0
br i1 %cmp1, label %if.then, label %if.end
; X64: # %bb.2: # %for.body
; X64-NEXT: # in Loop: Header=BB0_1 Depth=1
; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax
; X64-CBFX-NEXT: lfence
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: shrl $31, %ecx
; X64-NEXT: addl %eax, %ecx
; X64-NEXT: andl $-2, %ecx
; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB0_4
if.then: ; preds = %for.body
%3 = load i32**, i32*** %secret.addr, align 8
%4 = load i32, i32* %ret_val, align 4
%idxprom = sext i32 %4 to i64
%arrayidx = getelementptr inbounds i32*, i32** %3, i64 %idxprom
%5 = load i32*, i32** %arrayidx, align 8
%6 = load i32, i32* %5, align 4
store i32 %6, i32* %ret_val, align 4
br label %if.end
; X64: # %bb.3: # %if.then
; X64-NEXT: # in Loop: Header=BB0_1 Depth=1
; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-CBFX-NEXT: lfence
; X64-FX-NEXT: lfence
; X64-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx
; X64-CBFX-NEXT: lfence
; X64-FX-NEXT: lfence
; X64-NEXT: movq (%rax,%rcx,8), %rax
; X64-NEXT: lfence
; X64-NEXT: movl (%rax), %eax
; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X64-NEXT: jmp .LBB0_4
if.end: ; preds = %if.then, %for.body
br label %for.inc
for.inc: ; preds = %if.end
%7 = load i32, i32* %i, align 4
%inc = add nsw i32 %7, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
%8 = load i32, i32* %ret_val, align 4
ret i32 %8
}
; Function Attrs: nounwind
declare void @llvm.x86.sse2.lfence() #1
attributes #0 = { "target-features"="+lvi-load-hardening" }
attributes #1 = { nounwind }