2007-06-02 02:02:12 +08:00
|
|
|
//===--- CGStmt.cpp - Emit LLVM Code from Statements ----------------------===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2007-06-02 02:02:12 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
// This contains code to emit Stmt nodes as LLVM code.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2008-05-08 16:54:20 +08:00
|
|
|
#include "CGDebugInfo.h"
|
2020-06-02 00:39:25 +08:00
|
|
|
#include "CGOpenMPRuntime.h"
|
2019-12-10 08:11:56 +08:00
|
|
|
#include "CodeGenFunction.h"
|
2008-05-08 16:54:20 +08:00
|
|
|
#include "CodeGenModule.h"
|
2011-02-20 07:03:58 +08:00
|
|
|
#include "TargetInfo.h"
|
2019-12-10 08:11:56 +08:00
|
|
|
#include "clang/AST/Attr.h"
|
2021-04-16 07:49:19 +08:00
|
|
|
#include "clang/AST/Expr.h"
|
|
|
|
#include "clang/AST/Stmt.h"
|
2008-08-11 13:00:27 +08:00
|
|
|
#include "clang/AST/StmtVisitor.h"
|
2015-09-10 06:39:06 +08:00
|
|
|
#include "clang/Basic/Builtins.h"
|
2020-10-31 20:07:06 +08:00
|
|
|
#include "clang/Basic/DiagnosticSema.h"
|
2009-03-05 16:04:57 +08:00
|
|
|
#include "clang/Basic/PrettyStackTrace.h"
|
2020-02-28 03:01:58 +08:00
|
|
|
#include "clang/Basic/SourceManager.h"
|
2008-02-06 00:35:33 +08:00
|
|
|
#include "clang/Basic/TargetInfo.h"
|
2020-10-14 14:48:29 +08:00
|
|
|
#include "llvm/ADT/SmallSet.h"
|
2008-02-06 00:35:33 +08:00
|
|
|
#include "llvm/ADT/StringExtras.h"
|
2021-09-29 03:53:55 +08:00
|
|
|
#include "llvm/IR/Assumptions.h"
|
2013-01-02 19:45:17 +08:00
|
|
|
#include "llvm/IR/DataLayout.h"
|
|
|
|
#include "llvm/IR/InlineAsm.h"
|
|
|
|
#include "llvm/IR/Intrinsics.h"
|
2015-09-10 06:39:06 +08:00
|
|
|
#include "llvm/IR/MDBuilder.h"
|
2020-05-22 07:44:30 +08:00
|
|
|
#include "llvm/Support/SaveAndRestore.h"
|
2015-09-10 06:39:06 +08:00
|
|
|
|
2007-06-02 02:02:12 +08:00
|
|
|
using namespace clang;
|
|
|
|
using namespace CodeGen;
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Statement Emission
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2008-11-12 16:21:33 +08:00
|
|
|
void CodeGenFunction::EmitStopPoint(const Stmt *S) {
|
2009-02-13 16:11:52 +08:00
|
|
|
if (CGDebugInfo *DI = getDebugInfo()) {
|
2011-10-14 05:45:18 +08:00
|
|
|
SourceLocation Loc;
|
2018-08-10 05:08:08 +08:00
|
|
|
Loc = S->getBeginLoc();
|
2011-10-14 05:45:18 +08:00
|
|
|
DI->EmitLocation(Builder, Loc);
|
2013-05-03 01:30:20 +08:00
|
|
|
|
2014-01-08 06:05:52 +08:00
|
|
|
LastStopPoint = Loc;
|
2008-11-12 16:21:33 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-06 16:47:18 +08:00
|
|
|
void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
|
2007-06-02 02:02:12 +08:00
|
|
|
assert(S && "Null statement?");
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
PGO.setCurrentStmt(S);
|
2008-11-12 07:11:34 +08:00
|
|
|
|
2011-09-26 23:03:19 +08:00
|
|
|
// These statements have their own debug info handling.
|
2020-10-18 19:34:41 +08:00
|
|
|
if (EmitSimpleStmt(S, Attrs))
|
2008-11-12 16:21:33 +08:00
|
|
|
return;
|
|
|
|
|
2009-07-19 14:58:07 +08:00
|
|
|
// Check if we are generating unreachable code.
|
|
|
|
if (!HaveInsertPoint()) {
|
|
|
|
// If so, and the statement doesn't contain a label, then we do not need to
|
|
|
|
// generate actual code. This is safe because (1) the current point is
|
|
|
|
// unreachable, so we don't need to execute the code, and (2) we've already
|
|
|
|
// handled the statements which update internal data structures (like the
|
|
|
|
// local variable map) which could be used by subsequent statements.
|
|
|
|
if (!ContainsLabel(S)) {
|
|
|
|
// Verify that any decl statements were handled as simple, they may be in
|
|
|
|
// scope of subsequent reachable statements.
|
|
|
|
assert(!isa<DeclStmt>(*S) && "Unexpected DeclStmt!");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Otherwise, make a new block to hold the code.
|
|
|
|
EnsureInsertPoint();
|
|
|
|
}
|
|
|
|
|
2008-11-12 16:21:33 +08:00
|
|
|
// Generate a stoppoint if we are emitting debug info.
|
|
|
|
EmitStopPoint(S);
|
2008-05-08 16:54:20 +08:00
|
|
|
|
2017-12-30 02:07:07 +08:00
|
|
|
// Ignore all OpenMP directives except for simd if OpenMP with Simd is
|
|
|
|
// enabled.
|
|
|
|
if (getLangOpts().OpenMP && getLangOpts().OpenMPSimd) {
|
|
|
|
if (const auto *D = dyn_cast<OMPExecutableDirective>(S)) {
|
|
|
|
EmitSimpleOMPExecutableDirective(*D);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-06-02 02:02:12 +08:00
|
|
|
switch (S->getStmtClass()) {
|
2010-12-05 10:00:02 +08:00
|
|
|
case Stmt::NoStmtClass:
|
|
|
|
case Stmt::CXXCatchStmtClass:
|
2011-04-28 09:08:34 +08:00
|
|
|
case Stmt::SEHExceptStmtClass:
|
|
|
|
case Stmt::SEHFinallyStmtClass:
|
2011-10-25 09:33:02 +08:00
|
|
|
case Stmt::MSDependentExistsStmtClass:
|
2010-12-05 10:00:02 +08:00
|
|
|
llvm_unreachable("invalid statement class to emit generically");
|
|
|
|
case Stmt::NullStmtClass:
|
|
|
|
case Stmt::CompoundStmtClass:
|
|
|
|
case Stmt::DeclStmtClass:
|
|
|
|
case Stmt::LabelStmtClass:
|
2012-04-14 08:33:13 +08:00
|
|
|
case Stmt::AttributedStmtClass:
|
2010-12-05 10:00:02 +08:00
|
|
|
case Stmt::GotoStmtClass:
|
|
|
|
case Stmt::BreakStmtClass:
|
|
|
|
case Stmt::ContinueStmtClass:
|
|
|
|
case Stmt::DefaultStmtClass:
|
|
|
|
case Stmt::CaseStmtClass:
|
2015-02-13 07:16:11 +08:00
|
|
|
case Stmt::SEHLeaveStmtClass:
|
2010-12-05 10:00:02 +08:00
|
|
|
llvm_unreachable("should have emitted these statements as simple");
|
2009-07-19 16:23:12 +08:00
|
|
|
|
2010-12-05 10:00:02 +08:00
|
|
|
#define STMT(Type, Base)
|
|
|
|
#define ABSTRACT_STMT(Op)
|
|
|
|
#define EXPR(Type, Base) \
|
|
|
|
case Stmt::Type##Class:
|
|
|
|
#include "clang/AST/StmtNodes.inc"
|
2011-01-12 11:41:02 +08:00
|
|
|
{
|
|
|
|
// Remember the block we came in on.
|
|
|
|
llvm::BasicBlock *incoming = Builder.GetInsertBlock();
|
|
|
|
assert(incoming && "expression emission must have an insertion point");
|
|
|
|
|
2010-12-05 10:00:02 +08:00
|
|
|
EmitIgnoredExpr(cast<Expr>(S));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-01-12 11:41:02 +08:00
|
|
|
llvm::BasicBlock *outgoing = Builder.GetInsertBlock();
|
|
|
|
assert(outgoing && "expression emission cleared block!");
|
|
|
|
|
|
|
|
// The expression emitters assume (reasonably!) that the insertion
|
|
|
|
// point is always set. To maintain that, the call-emission code
|
|
|
|
// for noreturn functions has to enter a new block with no
|
|
|
|
// predecessors. We want to kill that block and mark the current
|
|
|
|
// insertion point unreachable in the common case of a call like
|
|
|
|
// "exit();". Since expression emission doesn't otherwise create
|
|
|
|
// blocks with no predecessors, we can just test for that.
|
|
|
|
// However, we must be careful not to do this to our incoming
|
|
|
|
// block, because *statement* emission does sometimes create
|
|
|
|
// reachable blocks which will have no predecessors until later in
|
|
|
|
// the function. This occurs with, e.g., labels that are not
|
|
|
|
// reachable by fallthrough.
|
|
|
|
if (incoming != outgoing && outgoing->use_empty()) {
|
|
|
|
outgoing->eraseFromParent();
|
|
|
|
Builder.ClearInsertionPoint();
|
2007-06-02 02:02:12 +08:00
|
|
|
}
|
|
|
|
break;
|
2011-01-12 11:41:02 +08:00
|
|
|
}
|
2010-12-05 10:00:02 +08:00
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
case Stmt::IndirectGotoStmtClass:
|
2008-08-05 00:51:22 +08:00
|
|
|
EmitIndirectGotoStmt(cast<IndirectGotoStmt>(*S)); break;
|
2007-06-05 11:59:43 +08:00
|
|
|
|
2017-09-06 16:47:18 +08:00
|
|
|
case Stmt::IfStmtClass: EmitIfStmt(cast<IfStmt>(*S)); break;
|
|
|
|
case Stmt::WhileStmtClass: EmitWhileStmt(cast<WhileStmt>(*S), Attrs); break;
|
|
|
|
case Stmt::DoStmtClass: EmitDoStmt(cast<DoStmt>(*S), Attrs); break;
|
|
|
|
case Stmt::ForStmtClass: EmitForStmt(cast<ForStmt>(*S), Attrs); break;
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2017-09-06 16:47:18 +08:00
|
|
|
case Stmt::ReturnStmtClass: EmitReturnStmt(cast<ReturnStmt>(*S)); break;
|
2008-10-03 02:02:06 +08:00
|
|
|
|
2017-09-06 16:47:18 +08:00
|
|
|
case Stmt::SwitchStmtClass: EmitSwitchStmt(cast<SwitchStmt>(*S)); break;
|
|
|
|
case Stmt::GCCAsmStmtClass: // Intentional fall-through.
|
|
|
|
case Stmt::MSAsmStmtClass: EmitAsmStmt(cast<AsmStmt>(*S)); break;
|
2015-10-27 14:02:45 +08:00
|
|
|
case Stmt::CoroutineBodyStmtClass:
|
2016-10-28 00:28:31 +08:00
|
|
|
EmitCoroutineBody(cast<CoroutineBodyStmt>(*S));
|
|
|
|
break;
|
2015-10-27 14:02:45 +08:00
|
|
|
case Stmt::CoreturnStmtClass:
|
2017-03-07 05:12:54 +08:00
|
|
|
EmitCoreturnStmt(cast<CoreturnStmt>(*S));
|
2015-10-27 14:02:45 +08:00
|
|
|
break;
|
2013-09-07 02:03:48 +08:00
|
|
|
case Stmt::CapturedStmtClass: {
|
|
|
|
const CapturedStmt *CS = cast<CapturedStmt>(S);
|
|
|
|
EmitCapturedStmt(*CS, CS->getCapturedRegionKind());
|
|
|
|
}
|
2013-04-17 02:53:08 +08:00
|
|
|
break;
|
2008-08-23 18:51:21 +08:00
|
|
|
case Stmt::ObjCAtTryStmtClass:
|
2008-09-09 18:04:29 +08:00
|
|
|
EmitObjCAtTryStmt(cast<ObjCAtTryStmt>(*S));
|
2009-09-09 23:08:12 +08:00
|
|
|
break;
|
2008-08-23 18:51:21 +08:00
|
|
|
case Stmt::ObjCAtCatchStmtClass:
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable(
|
|
|
|
"@catch statements should be handled by EmitObjCAtTryStmt");
|
2008-08-23 18:51:21 +08:00
|
|
|
case Stmt::ObjCAtFinallyStmtClass:
|
2011-09-23 13:06:16 +08:00
|
|
|
llvm_unreachable(
|
|
|
|
"@finally statements should be handled by EmitObjCAtTryStmt");
|
2008-08-23 18:51:21 +08:00
|
|
|
case Stmt::ObjCAtThrowStmtClass:
|
2008-09-09 18:04:29 +08:00
|
|
|
EmitObjCAtThrowStmt(cast<ObjCAtThrowStmt>(*S));
|
2008-08-23 18:51:21 +08:00
|
|
|
break;
|
|
|
|
case Stmt::ObjCAtSynchronizedStmtClass:
|
2008-11-16 05:26:17 +08:00
|
|
|
EmitObjCAtSynchronizedStmt(cast<ObjCAtSynchronizedStmt>(*S));
|
2008-08-23 18:51:21 +08:00
|
|
|
break;
|
2009-09-09 23:08:12 +08:00
|
|
|
case Stmt::ObjCForCollectionStmtClass:
|
2008-08-31 03:51:14 +08:00
|
|
|
EmitObjCForCollectionStmt(cast<ObjCForCollectionStmt>(*S));
|
2008-08-23 18:51:21 +08:00
|
|
|
break;
|
2011-06-16 07:02:42 +08:00
|
|
|
case Stmt::ObjCAutoreleasePoolStmtClass:
|
|
|
|
EmitObjCAutoreleasePoolStmt(cast<ObjCAutoreleasePoolStmt>(*S));
|
|
|
|
break;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2009-09-28 02:58:34 +08:00
|
|
|
case Stmt::CXXTryStmtClass:
|
|
|
|
EmitCXXTryStmt(cast<CXXTryStmt>(*S));
|
|
|
|
break;
|
2011-04-15 06:09:26 +08:00
|
|
|
case Stmt::CXXForRangeStmtClass:
|
2017-09-06 16:47:18 +08:00
|
|
|
EmitCXXForRangeStmt(cast<CXXForRangeStmt>(*S), Attrs);
|
2013-09-17 05:46:30 +08:00
|
|
|
break;
|
2011-04-28 09:08:34 +08:00
|
|
|
case Stmt::SEHTryStmtClass:
|
2013-09-17 05:46:30 +08:00
|
|
|
EmitSEHTryStmt(cast<SEHTryStmt>(*S));
|
2011-04-15 06:09:26 +08:00
|
|
|
break;
|
2021-09-18 05:03:01 +08:00
|
|
|
case Stmt::OMPMetaDirectiveClass:
|
|
|
|
EmitOMPMetaDirective(cast<OMPMetaDirective>(*S));
|
|
|
|
break;
|
2021-03-04 07:15:32 +08:00
|
|
|
case Stmt::OMPCanonicalLoopClass:
|
|
|
|
EmitOMPCanonicalLoop(cast<OMPCanonicalLoop>(S));
|
|
|
|
break;
|
2014-05-06 18:08:46 +08:00
|
|
|
case Stmt::OMPParallelDirectiveClass:
|
|
|
|
EmitOMPParallelDirective(cast<OMPParallelDirective>(*S));
|
|
|
|
break;
|
2014-05-22 16:54:05 +08:00
|
|
|
case Stmt::OMPSimdDirectiveClass:
|
|
|
|
EmitOMPSimdDirective(cast<OMPSimdDirective>(*S));
|
|
|
|
break;
|
2021-02-13 03:26:59 +08:00
|
|
|
case Stmt::OMPTileDirectiveClass:
|
|
|
|
EmitOMPTileDirective(cast<OMPTileDirective>(*S));
|
|
|
|
break;
|
2021-06-11 03:24:17 +08:00
|
|
|
case Stmt::OMPUnrollDirectiveClass:
|
|
|
|
EmitOMPUnrollDirective(cast<OMPUnrollDirective>(*S));
|
|
|
|
break;
|
2014-06-18 12:14:57 +08:00
|
|
|
case Stmt::OMPForDirectiveClass:
|
|
|
|
EmitOMPForDirective(cast<OMPForDirective>(*S));
|
|
|
|
break;
|
2014-09-18 13:12:34 +08:00
|
|
|
case Stmt::OMPForSimdDirectiveClass:
|
|
|
|
EmitOMPForSimdDirective(cast<OMPForSimdDirective>(*S));
|
|
|
|
break;
|
2014-06-25 19:44:49 +08:00
|
|
|
case Stmt::OMPSectionsDirectiveClass:
|
|
|
|
EmitOMPSectionsDirective(cast<OMPSectionsDirective>(*S));
|
|
|
|
break;
|
2014-06-26 16:21:58 +08:00
|
|
|
case Stmt::OMPSectionDirectiveClass:
|
|
|
|
EmitOMPSectionDirective(cast<OMPSectionDirective>(*S));
|
|
|
|
break;
|
2014-06-26 20:05:45 +08:00
|
|
|
case Stmt::OMPSingleDirectiveClass:
|
|
|
|
EmitOMPSingleDirective(cast<OMPSingleDirective>(*S));
|
|
|
|
break;
|
2014-07-17 16:54:58 +08:00
|
|
|
case Stmt::OMPMasterDirectiveClass:
|
|
|
|
EmitOMPMasterDirective(cast<OMPMasterDirective>(*S));
|
|
|
|
break;
|
2014-07-21 17:42:05 +08:00
|
|
|
case Stmt::OMPCriticalDirectiveClass:
|
|
|
|
EmitOMPCriticalDirective(cast<OMPCriticalDirective>(*S));
|
|
|
|
break;
|
2014-07-07 21:01:15 +08:00
|
|
|
case Stmt::OMPParallelForDirectiveClass:
|
|
|
|
EmitOMPParallelForDirective(cast<OMPParallelForDirective>(*S));
|
|
|
|
break;
|
2014-09-23 17:33:00 +08:00
|
|
|
case Stmt::OMPParallelForSimdDirectiveClass:
|
|
|
|
EmitOMPParallelForSimdDirective(cast<OMPParallelForSimdDirective>(*S));
|
|
|
|
break;
|
2019-12-06 02:43:48 +08:00
|
|
|
case Stmt::OMPParallelMasterDirectiveClass:
|
|
|
|
EmitOMPParallelMasterDirective(cast<OMPParallelMasterDirective>(*S));
|
|
|
|
break;
|
2014-07-08 16:12:03 +08:00
|
|
|
case Stmt::OMPParallelSectionsDirectiveClass:
|
|
|
|
EmitOMPParallelSectionsDirective(cast<OMPParallelSectionsDirective>(*S));
|
|
|
|
break;
|
2014-07-11 19:25:16 +08:00
|
|
|
case Stmt::OMPTaskDirectiveClass:
|
|
|
|
EmitOMPTaskDirective(cast<OMPTaskDirective>(*S));
|
|
|
|
break;
|
2014-07-18 15:47:19 +08:00
|
|
|
case Stmt::OMPTaskyieldDirectiveClass:
|
|
|
|
EmitOMPTaskyieldDirective(cast<OMPTaskyieldDirective>(*S));
|
|
|
|
break;
|
2014-07-18 17:11:51 +08:00
|
|
|
case Stmt::OMPBarrierDirectiveClass:
|
|
|
|
EmitOMPBarrierDirective(cast<OMPBarrierDirective>(*S));
|
|
|
|
break;
|
2014-07-18 18:17:07 +08:00
|
|
|
case Stmt::OMPTaskwaitDirectiveClass:
|
|
|
|
EmitOMPTaskwaitDirective(cast<OMPTaskwaitDirective>(*S));
|
|
|
|
break;
|
2015-06-18 20:14:09 +08:00
|
|
|
case Stmt::OMPTaskgroupDirectiveClass:
|
|
|
|
EmitOMPTaskgroupDirective(cast<OMPTaskgroupDirective>(*S));
|
|
|
|
break;
|
2014-07-21 19:26:11 +08:00
|
|
|
case Stmt::OMPFlushDirectiveClass:
|
|
|
|
EmitOMPFlushDirective(cast<OMPFlushDirective>(*S));
|
|
|
|
break;
|
2020-02-28 22:52:15 +08:00
|
|
|
case Stmt::OMPDepobjDirectiveClass:
|
|
|
|
EmitOMPDepobjDirective(cast<OMPDepobjDirective>(*S));
|
|
|
|
break;
|
2020-03-20 19:03:01 +08:00
|
|
|
case Stmt::OMPScanDirectiveClass:
|
2020-05-05 04:19:31 +08:00
|
|
|
EmitOMPScanDirective(cast<OMPScanDirective>(*S));
|
2020-03-20 19:03:01 +08:00
|
|
|
break;
|
2014-07-22 14:45:04 +08:00
|
|
|
case Stmt::OMPOrderedDirectiveClass:
|
|
|
|
EmitOMPOrderedDirective(cast<OMPOrderedDirective>(*S));
|
|
|
|
break;
|
2014-07-22 18:10:35 +08:00
|
|
|
case Stmt::OMPAtomicDirectiveClass:
|
|
|
|
EmitOMPAtomicDirective(cast<OMPAtomicDirective>(*S));
|
|
|
|
break;
|
2014-09-19 16:19:49 +08:00
|
|
|
case Stmt::OMPTargetDirectiveClass:
|
|
|
|
EmitOMPTargetDirective(cast<OMPTargetDirective>(*S));
|
|
|
|
break;
|
2014-10-09 12:18:56 +08:00
|
|
|
case Stmt::OMPTeamsDirectiveClass:
|
|
|
|
EmitOMPTeamsDirective(cast<OMPTeamsDirective>(*S));
|
|
|
|
break;
|
2015-07-01 14:57:41 +08:00
|
|
|
case Stmt::OMPCancellationPointDirectiveClass:
|
|
|
|
EmitOMPCancellationPointDirective(cast<OMPCancellationPointDirective>(*S));
|
|
|
|
break;
|
2015-07-02 19:25:17 +08:00
|
|
|
case Stmt::OMPCancelDirectiveClass:
|
|
|
|
EmitOMPCancelDirective(cast<OMPCancelDirective>(*S));
|
|
|
|
break;
|
2015-07-21 21:44:28 +08:00
|
|
|
case Stmt::OMPTargetDataDirectiveClass:
|
|
|
|
EmitOMPTargetDataDirective(cast<OMPTargetDataDirective>(*S));
|
|
|
|
break;
|
2016-01-20 03:15:56 +08:00
|
|
|
case Stmt::OMPTargetEnterDataDirectiveClass:
|
|
|
|
EmitOMPTargetEnterDataDirective(cast<OMPTargetEnterDataDirective>(*S));
|
|
|
|
break;
|
2016-01-20 04:04:50 +08:00
|
|
|
case Stmt::OMPTargetExitDataDirectiveClass:
|
|
|
|
EmitOMPTargetExitDataDirective(cast<OMPTargetExitDataDirective>(*S));
|
|
|
|
break;
|
2016-01-27 02:48:41 +08:00
|
|
|
case Stmt::OMPTargetParallelDirectiveClass:
|
|
|
|
EmitOMPTargetParallelDirective(cast<OMPTargetParallelDirective>(*S));
|
|
|
|
break;
|
2016-02-03 23:46:42 +08:00
|
|
|
case Stmt::OMPTargetParallelForDirectiveClass:
|
|
|
|
EmitOMPTargetParallelForDirective(cast<OMPTargetParallelForDirective>(*S));
|
|
|
|
break;
|
2015-12-01 12:18:41 +08:00
|
|
|
case Stmt::OMPTaskLoopDirectiveClass:
|
|
|
|
EmitOMPTaskLoopDirective(cast<OMPTaskLoopDirective>(*S));
|
|
|
|
break;
|
2015-12-03 17:40:15 +08:00
|
|
|
case Stmt::OMPTaskLoopSimdDirectiveClass:
|
|
|
|
EmitOMPTaskLoopSimdDirective(cast<OMPTaskLoopSimdDirective>(*S));
|
|
|
|
break;
|
2019-10-11 04:13:02 +08:00
|
|
|
case Stmt::OMPMasterTaskLoopDirectiveClass:
|
|
|
|
EmitOMPMasterTaskLoopDirective(cast<OMPMasterTaskLoopDirective>(*S));
|
|
|
|
break;
|
2022-06-24 23:42:21 +08:00
|
|
|
case Stmt::OMPMaskedTaskLoopDirectiveClass:
|
|
|
|
llvm_unreachable("masked taskloop directive not supported yet.");
|
|
|
|
break;
|
2019-10-19 00:47:35 +08:00
|
|
|
case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
|
|
|
|
EmitOMPMasterTaskLoopSimdDirective(
|
|
|
|
cast<OMPMasterTaskLoopSimdDirective>(*S));
|
|
|
|
break;
|
2019-10-15 01:17:41 +08:00
|
|
|
case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
|
|
|
|
EmitOMPParallelMasterTaskLoopDirective(
|
|
|
|
cast<OMPParallelMasterTaskLoopDirective>(*S));
|
|
|
|
break;
|
2019-10-25 22:27:13 +08:00
|
|
|
case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
|
|
|
|
EmitOMPParallelMasterTaskLoopSimdDirective(
|
|
|
|
cast<OMPParallelMasterTaskLoopSimdDirective>(*S));
|
|
|
|
break;
|
2016-01-29 02:06:31 +08:00
|
|
|
case Stmt::OMPDistributeDirectiveClass:
|
2015-12-14 22:51:25 +08:00
|
|
|
EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S));
|
2016-01-29 02:06:31 +08:00
|
|
|
break;
|
2016-05-27 01:30:50 +08:00
|
|
|
case Stmt::OMPTargetUpdateDirectiveClass:
|
|
|
|
EmitOMPTargetUpdateDirective(cast<OMPTargetUpdateDirective>(*S));
|
|
|
|
break;
|
2016-06-27 22:55:37 +08:00
|
|
|
case Stmt::OMPDistributeParallelForDirectiveClass:
|
|
|
|
EmitOMPDistributeParallelForDirective(
|
|
|
|
cast<OMPDistributeParallelForDirective>(*S));
|
|
|
|
break;
|
2016-07-05 13:00:15 +08:00
|
|
|
case Stmt::OMPDistributeParallelForSimdDirectiveClass:
|
|
|
|
EmitOMPDistributeParallelForSimdDirective(
|
|
|
|
cast<OMPDistributeParallelForSimdDirective>(*S));
|
|
|
|
break;
|
2016-07-06 12:45:38 +08:00
|
|
|
case Stmt::OMPDistributeSimdDirectiveClass:
|
|
|
|
EmitOMPDistributeSimdDirective(cast<OMPDistributeSimdDirective>(*S));
|
|
|
|
break;
|
2016-07-14 10:54:56 +08:00
|
|
|
case Stmt::OMPTargetParallelForSimdDirectiveClass:
|
|
|
|
EmitOMPTargetParallelForSimdDirective(
|
|
|
|
cast<OMPTargetParallelForSimdDirective>(*S));
|
|
|
|
break;
|
2016-07-21 06:57:10 +08:00
|
|
|
case Stmt::OMPTargetSimdDirectiveClass:
|
|
|
|
EmitOMPTargetSimdDirective(cast<OMPTargetSimdDirective>(*S));
|
|
|
|
break;
|
2016-08-05 22:37:37 +08:00
|
|
|
case Stmt::OMPTeamsDistributeDirectiveClass:
|
|
|
|
EmitOMPTeamsDistributeDirective(cast<OMPTeamsDistributeDirective>(*S));
|
|
|
|
break;
|
2016-10-25 20:50:55 +08:00
|
|
|
case Stmt::OMPTeamsDistributeSimdDirectiveClass:
|
|
|
|
EmitOMPTeamsDistributeSimdDirective(
|
|
|
|
cast<OMPTeamsDistributeSimdDirective>(*S));
|
|
|
|
break;
|
2016-12-01 07:51:03 +08:00
|
|
|
case Stmt::OMPTeamsDistributeParallelForSimdDirectiveClass:
|
|
|
|
EmitOMPTeamsDistributeParallelForSimdDirective(
|
|
|
|
cast<OMPTeamsDistributeParallelForSimdDirective>(*S));
|
|
|
|
break;
|
2016-12-09 11:24:30 +08:00
|
|
|
case Stmt::OMPTeamsDistributeParallelForDirectiveClass:
|
|
|
|
EmitOMPTeamsDistributeParallelForDirective(
|
|
|
|
cast<OMPTeamsDistributeParallelForDirective>(*S));
|
|
|
|
break;
|
2016-12-17 13:48:59 +08:00
|
|
|
case Stmt::OMPTargetTeamsDirectiveClass:
|
|
|
|
EmitOMPTargetTeamsDirective(cast<OMPTargetTeamsDirective>(*S));
|
|
|
|
break;
|
2016-12-25 12:52:54 +08:00
|
|
|
case Stmt::OMPTargetTeamsDistributeDirectiveClass:
|
|
|
|
EmitOMPTargetTeamsDistributeDirective(
|
|
|
|
cast<OMPTargetTeamsDistributeDirective>(*S));
|
|
|
|
break;
|
2016-12-30 06:16:30 +08:00
|
|
|
case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
|
|
|
|
EmitOMPTargetTeamsDistributeParallelForDirective(
|
|
|
|
cast<OMPTargetTeamsDistributeParallelForDirective>(*S));
|
|
|
|
break;
|
2017-01-03 13:23:48 +08:00
|
|
|
case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
|
|
|
|
EmitOMPTargetTeamsDistributeParallelForSimdDirective(
|
|
|
|
cast<OMPTargetTeamsDistributeParallelForSimdDirective>(*S));
|
|
|
|
break;
|
2017-01-11 02:08:18 +08:00
|
|
|
case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
|
|
|
|
EmitOMPTargetTeamsDistributeSimdDirective(
|
|
|
|
cast<OMPTargetTeamsDistributeSimdDirective>(*S));
|
|
|
|
break;
|
2021-03-16 04:09:46 +08:00
|
|
|
case Stmt::OMPInteropDirectiveClass:
|
2022-01-28 03:53:00 +08:00
|
|
|
EmitOMPInteropDirective(cast<OMPInteropDirective>(*S));
|
2021-03-16 04:09:46 +08:00
|
|
|
break;
|
2021-03-23 09:13:29 +08:00
|
|
|
case Stmt::OMPDispatchDirectiveClass:
|
|
|
|
llvm_unreachable("Dispatch directive not supported yet.");
|
|
|
|
break;
|
2021-04-10 03:00:36 +08:00
|
|
|
case Stmt::OMPMaskedDirectiveClass:
|
2021-04-16 01:54:15 +08:00
|
|
|
EmitOMPMaskedDirective(cast<OMPMaskedDirective>(*S));
|
2021-04-10 03:00:36 +08:00
|
|
|
break;
|
2021-10-28 23:10:40 +08:00
|
|
|
case Stmt::OMPGenericLoopDirectiveClass:
|
|
|
|
EmitOMPGenericLoopDirective(cast<OMPGenericLoopDirective>(*S));
|
|
|
|
break;
|
2022-03-15 23:35:59 +08:00
|
|
|
case Stmt::OMPTeamsGenericLoopDirectiveClass:
|
|
|
|
llvm_unreachable("teams loop directive not supported yet.");
|
|
|
|
break;
|
2022-03-19 02:02:02 +08:00
|
|
|
case Stmt::OMPTargetTeamsGenericLoopDirectiveClass:
|
|
|
|
llvm_unreachable("target teams loop directive not supported yet.");
|
|
|
|
break;
|
2022-03-23 01:55:21 +08:00
|
|
|
case Stmt::OMPParallelGenericLoopDirectiveClass:
|
|
|
|
llvm_unreachable("parallel loop directive not supported yet.");
|
|
|
|
break;
|
2022-03-24 06:37:06 +08:00
|
|
|
case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
|
|
|
|
llvm_unreachable("target parallel loop directive not supported yet.");
|
|
|
|
break;
|
2022-06-17 07:32:30 +08:00
|
|
|
case Stmt::OMPParallelMaskedDirectiveClass:
|
|
|
|
llvm_unreachable("parallel masked directive not supported yet.");
|
|
|
|
break;
|
2007-06-02 02:02:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-18 19:34:41 +08:00
|
|
|
bool CodeGenFunction::EmitSimpleStmt(const Stmt *S,
|
|
|
|
ArrayRef<const Attr *> Attrs) {
|
2008-11-12 16:21:33 +08:00
|
|
|
switch (S->getStmtClass()) {
|
2020-10-18 19:34:41 +08:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
case Stmt::NullStmtClass:
|
|
|
|
break;
|
|
|
|
case Stmt::CompoundStmtClass:
|
|
|
|
EmitCompoundStmt(cast<CompoundStmt>(*S));
|
|
|
|
break;
|
|
|
|
case Stmt::DeclStmtClass:
|
|
|
|
EmitDeclStmt(cast<DeclStmt>(*S));
|
|
|
|
break;
|
|
|
|
case Stmt::LabelStmtClass:
|
|
|
|
EmitLabelStmt(cast<LabelStmt>(*S));
|
|
|
|
break;
|
2012-04-14 08:33:13 +08:00
|
|
|
case Stmt::AttributedStmtClass:
|
2020-10-18 19:34:41 +08:00
|
|
|
EmitAttributedStmt(cast<AttributedStmt>(*S));
|
|
|
|
break;
|
|
|
|
case Stmt::GotoStmtClass:
|
|
|
|
EmitGotoStmt(cast<GotoStmt>(*S));
|
|
|
|
break;
|
|
|
|
case Stmt::BreakStmtClass:
|
|
|
|
EmitBreakStmt(cast<BreakStmt>(*S));
|
|
|
|
break;
|
|
|
|
case Stmt::ContinueStmtClass:
|
|
|
|
EmitContinueStmt(cast<ContinueStmt>(*S));
|
|
|
|
break;
|
|
|
|
case Stmt::DefaultStmtClass:
|
|
|
|
EmitDefaultStmt(cast<DefaultStmt>(*S), Attrs);
|
|
|
|
break;
|
|
|
|
case Stmt::CaseStmtClass:
|
|
|
|
EmitCaseStmt(cast<CaseStmt>(*S), Attrs);
|
|
|
|
break;
|
|
|
|
case Stmt::SEHLeaveStmtClass:
|
|
|
|
EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S));
|
|
|
|
break;
|
2008-11-12 16:21:33 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2007-09-01 06:09:40 +08:00
|
|
|
/// EmitCompoundStmt - Emit a compound statement {..} node. If GetLast is true,
|
|
|
|
/// this captures the expression result of the last sub-statement and returns it
|
|
|
|
/// (for use by the statement expression extension).
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CodeGenFunction::EmitCompoundStmt(const CompoundStmt &S, bool GetLast,
|
|
|
|
AggValueSlot AggSlot) {
|
2009-03-05 16:04:57 +08:00
|
|
|
PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),S.getLBracLoc(),
|
|
|
|
"LLVM IR generation of compound statement ('{}')");
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-02-23 08:43:07 +08:00
|
|
|
// Keep track of the current cleanup stack depth, including debug scopes.
|
|
|
|
LexicalScope Scope(*this, S.getSourceRange());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2013-01-27 06:16:26 +08:00
|
|
|
return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot);
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address
|
2013-06-11 06:04:49 +08:00
|
|
|
CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S,
|
|
|
|
bool GetLast,
|
|
|
|
AggValueSlot AggSlot) {
|
2013-01-27 06:16:26 +08:00
|
|
|
|
2019-07-09 23:02:07 +08:00
|
|
|
const Stmt *ExprResult = S.getStmtExprResult();
|
|
|
|
assert((!GetLast || (GetLast && ExprResult)) &&
|
|
|
|
"If GetLast is true then the CompoundStmt must have a StmtExprResult");
|
2008-05-08 16:54:20 +08:00
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address RetAlloca = Address::invalid();
|
2019-07-09 23:02:07 +08:00
|
|
|
|
|
|
|
for (auto *CurStmt : S.body()) {
|
|
|
|
if (GetLast && ExprResult == CurStmt) {
|
|
|
|
// We have to special case labels here. They are statements, but when put
|
|
|
|
// at the end of a statement expression, they yield the value of their
|
|
|
|
// subexpression. Handle this by walking through all labels we encounter,
|
|
|
|
// emitting them before we evaluate the subexpr.
|
|
|
|
// Similar issues arise for attributed statements.
|
|
|
|
while (!isa<Expr>(ExprResult)) {
|
|
|
|
if (const auto *LS = dyn_cast<LabelStmt>(ExprResult)) {
|
|
|
|
EmitLabel(LS->getDecl());
|
|
|
|
ExprResult = LS->getSubStmt();
|
|
|
|
} else if (const auto *AS = dyn_cast<AttributedStmt>(ExprResult)) {
|
|
|
|
// FIXME: Update this if we ever have attributes that affect the
|
|
|
|
// semantics of an expression.
|
|
|
|
ExprResult = AS->getSubStmt();
|
|
|
|
} else {
|
|
|
|
llvm_unreachable("unknown value statement");
|
|
|
|
}
|
2019-02-15 08:27:53 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2019-07-09 23:02:07 +08:00
|
|
|
EnsureInsertPoint();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2019-07-09 23:02:07 +08:00
|
|
|
const Expr *E = cast<Expr>(ExprResult);
|
|
|
|
QualType ExprTy = E->getType();
|
|
|
|
if (hasAggregateEvaluationKind(ExprTy)) {
|
|
|
|
EmitAggExpr(E, AggSlot);
|
|
|
|
} else {
|
|
|
|
// We can't return an RValue here because there might be cleanups at
|
|
|
|
// the end of the StmtExpr. Because of that, we have to emit the result
|
|
|
|
// here into a temporary alloca.
|
|
|
|
RetAlloca = CreateMemTemp(ExprTy);
|
|
|
|
EmitAnyExprToMem(E, RetAlloca, Qualifiers(),
|
|
|
|
/*IsInit*/ false);
|
|
|
|
}
|
2013-06-11 06:04:49 +08:00
|
|
|
} else {
|
2019-07-09 23:02:07 +08:00
|
|
|
EmitStmt(CurStmt);
|
2013-06-11 06:04:49 +08:00
|
|
|
}
|
2008-07-27 04:23:23 +08:00
|
|
|
}
|
2008-11-12 07:11:34 +08:00
|
|
|
|
2013-06-11 06:04:49 +08:00
|
|
|
return RetAlloca;
|
2007-06-02 02:02:12 +08:00
|
|
|
}
|
|
|
|
|
2009-04-01 12:37:47 +08:00
|
|
|
void CodeGenFunction::SimplifyForwardingBlocks(llvm::BasicBlock *BB) {
|
|
|
|
llvm::BranchInst *BI = dyn_cast<llvm::BranchInst>(BB->getTerminator());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-04-01 12:37:47 +08:00
|
|
|
// If there is a cleanup stack, then we it isn't worth trying to
|
|
|
|
// simplify this block (we would need to remove it from the scope map
|
|
|
|
// and cleanup entry).
|
2010-07-06 09:34:17 +08:00
|
|
|
if (!EHStack.empty())
|
2009-04-01 12:37:47 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
// Can only simplify direct branches.
|
|
|
|
if (!BI || !BI->isUnconditional())
|
|
|
|
return;
|
|
|
|
|
2012-10-27 07:23:35 +08:00
|
|
|
// Can only simplify empty blocks.
|
2015-11-07 07:00:41 +08:00
|
|
|
if (BI->getIterator() != BB->begin())
|
2012-10-27 07:23:35 +08:00
|
|
|
return;
|
|
|
|
|
2009-04-01 12:37:47 +08:00
|
|
|
BB->replaceAllUsesWith(BI->getSuccessor(0));
|
|
|
|
BI->eraseFromParent();
|
|
|
|
BB->eraseFromParent();
|
|
|
|
}
|
|
|
|
|
2008-11-13 09:24:05 +08:00
|
|
|
void CodeGenFunction::EmitBlock(llvm::BasicBlock *BB, bool IsFinished) {
|
2010-04-21 19:18:06 +08:00
|
|
|
llvm::BasicBlock *CurBB = Builder.GetInsertBlock();
|
|
|
|
|
2008-11-11 17:41:28 +08:00
|
|
|
// Fall out of the current block (if necessary).
|
|
|
|
EmitBranch(BB);
|
2008-11-13 09:24:05 +08:00
|
|
|
|
|
|
|
if (IsFinished && BB->use_empty()) {
|
|
|
|
delete BB;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2010-04-21 18:29:06 +08:00
|
|
|
// Place the block after the current block, if possible, or else at
|
|
|
|
// the end of the function.
|
2010-04-21 19:18:06 +08:00
|
|
|
if (CurBB && CurBB->getParent())
|
2015-11-07 07:00:41 +08:00
|
|
|
CurFn->getBasicBlockList().insertAfter(CurBB->getIterator(), BB);
|
2010-04-21 18:29:06 +08:00
|
|
|
else
|
|
|
|
CurFn->getBasicBlockList().push_back(BB);
|
2008-11-11 17:41:28 +08:00
|
|
|
Builder.SetInsertPoint(BB);
|
|
|
|
}
|
|
|
|
|
|
|
|
void CodeGenFunction::EmitBranch(llvm::BasicBlock *Target) {
|
|
|
|
// Emit a branch from the current block to the target one if this
|
|
|
|
// was a real block. If this was just a fall-through block after a
|
|
|
|
// terminator, don't emit it.
|
|
|
|
llvm::BasicBlock *CurBB = Builder.GetInsertBlock();
|
|
|
|
|
|
|
|
if (!CurBB || CurBB->getTerminator()) {
|
|
|
|
// If there is no insert point or the previous block is already
|
|
|
|
// terminated, don't touch it.
|
2007-06-02 02:02:12 +08:00
|
|
|
} else {
|
|
|
|
// Otherwise, create a fall-through branch.
|
2008-11-11 17:41:28 +08:00
|
|
|
Builder.CreateBr(Target);
|
2007-06-02 02:02:12 +08:00
|
|
|
}
|
2008-11-12 06:06:59 +08:00
|
|
|
|
|
|
|
Builder.ClearInsertionPoint();
|
2007-06-02 02:02:12 +08:00
|
|
|
}
|
|
|
|
|
2011-08-11 10:22:43 +08:00
|
|
|
void CodeGenFunction::EmitBlockAfterUses(llvm::BasicBlock *block) {
|
|
|
|
bool inserted = false;
|
2014-03-09 11:16:50 +08:00
|
|
|
for (llvm::User *u : block->users()) {
|
|
|
|
if (llvm::Instruction *insn = dyn_cast<llvm::Instruction>(u)) {
|
2015-11-07 07:00:41 +08:00
|
|
|
CurFn->getBasicBlockList().insertAfter(insn->getParent()->getIterator(),
|
|
|
|
block);
|
2011-08-11 10:22:43 +08:00
|
|
|
inserted = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!inserted)
|
|
|
|
CurFn->getBasicBlockList().push_back(block);
|
|
|
|
|
|
|
|
Builder.SetInsertPoint(block);
|
|
|
|
}
|
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
CodeGenFunction::JumpDest
|
2011-02-17 15:39:24 +08:00
|
|
|
CodeGenFunction::getJumpDestForLabel(const LabelDecl *D) {
|
|
|
|
JumpDest &Dest = LabelMap[D];
|
2010-07-24 05:56:41 +08:00
|
|
|
if (Dest.isValid()) return Dest;
|
2010-07-06 09:34:17 +08:00
|
|
|
|
|
|
|
// Create, but don't insert, the new block.
|
2011-02-17 15:39:24 +08:00
|
|
|
Dest = JumpDest(createBasicBlock(D->getName()),
|
2010-07-24 05:56:41 +08:00
|
|
|
EHScopeStack::stable_iterator::invalid(),
|
|
|
|
NextCleanupDestIndex++);
|
2010-07-06 09:34:17 +08:00
|
|
|
return Dest;
|
|
|
|
}
|
|
|
|
|
2011-02-17 15:39:24 +08:00
|
|
|
void CodeGenFunction::EmitLabel(const LabelDecl *D) {
|
2013-03-23 14:43:35 +08:00
|
|
|
// Add this label to the current lexical scope if we're within any
|
|
|
|
// normal cleanups. Jumps "in" to this label --- when permitted by
|
|
|
|
// the language --- may need to be routed around such cleanups.
|
|
|
|
if (EHStack.hasNormalCleanups() && CurLexicalScope)
|
|
|
|
CurLexicalScope->addLabel(D);
|
|
|
|
|
2011-02-17 15:39:24 +08:00
|
|
|
JumpDest &Dest = LabelMap[D];
|
2010-07-06 09:34:17 +08:00
|
|
|
|
2010-07-24 05:56:41 +08:00
|
|
|
// If we didn't need a forward reference to this label, just go
|
2010-07-06 09:34:17 +08:00
|
|
|
// ahead and create a destination at the current scope.
|
2010-07-24 05:56:41 +08:00
|
|
|
if (!Dest.isValid()) {
|
2011-02-17 15:39:24 +08:00
|
|
|
Dest = getJumpDestInCurrentScope(D->getName());
|
2010-07-06 09:34:17 +08:00
|
|
|
|
|
|
|
// Otherwise, we need to give this label a target depth and remove
|
|
|
|
// it from the branch-fixups list.
|
|
|
|
} else {
|
2010-07-24 05:56:41 +08:00
|
|
|
assert(!Dest.getScopeDepth().isValid() && "already emitted label!");
|
2013-03-23 14:43:35 +08:00
|
|
|
Dest.setScopeDepth(EHStack.stable_begin());
|
2010-07-24 05:56:41 +08:00
|
|
|
ResolveBranchFixups(Dest.getBlock());
|
2010-07-06 09:34:17 +08:00
|
|
|
}
|
|
|
|
|
2010-07-24 05:56:41 +08:00
|
|
|
EmitBlock(Dest.getBlock());
|
2019-01-24 13:34:29 +08:00
|
|
|
|
|
|
|
// Emit debug info for labels.
|
|
|
|
if (CGDebugInfo *DI = getDebugInfo()) {
|
2020-01-14 07:54:54 +08:00
|
|
|
if (CGM.getCodeGenOpts().hasReducedDebugInfo()) {
|
2019-01-24 13:34:29 +08:00
|
|
|
DI->setLocation(D->getLocation());
|
|
|
|
DI->EmitLabel(D, Builder);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(D->getStmt());
|
2008-07-27 04:23:23 +08:00
|
|
|
}
|
|
|
|
|
2013-03-23 14:43:35 +08:00
|
|
|
/// Change the cleanup scope of the labels in this lexical scope to
|
|
|
|
/// match the scope of the enclosing context.
|
|
|
|
void CodeGenFunction::LexicalScope::rescopeLabels() {
|
|
|
|
assert(!Labels.empty());
|
|
|
|
EHScopeStack::stable_iterator innermostScope
|
|
|
|
= CGF.EHStack.getInnermostNormalCleanup();
|
|
|
|
|
|
|
|
// Change the scope depth of all the labels.
|
|
|
|
for (SmallVectorImpl<const LabelDecl*>::const_iterator
|
|
|
|
i = Labels.begin(), e = Labels.end(); i != e; ++i) {
|
|
|
|
assert(CGF.LabelMap.count(*i));
|
|
|
|
JumpDest &dest = CGF.LabelMap.find(*i)->second;
|
|
|
|
assert(dest.getScopeDepth().isValid());
|
|
|
|
assert(innermostScope.encloses(dest.getScopeDepth()));
|
|
|
|
dest.setScopeDepth(innermostScope);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reparent the labels if the new scope also has cleanups.
|
|
|
|
if (innermostScope != EHScopeStack::stable_end() && ParentScope) {
|
|
|
|
ParentScope->Labels.append(Labels.begin(), Labels.end());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-27 04:23:23 +08:00
|
|
|
|
|
|
|
void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) {
|
2011-02-17 15:39:24 +08:00
|
|
|
EmitLabel(S.getDecl());
|
[Windows SEH]: HARDWARE EXCEPTION HANDLING (MSVC -EHa) - Part 1
This patch is the Part-1 (FE Clang) implementation of HW Exception handling.
This new feature adds the support of Hardware Exception for Microsoft Windows
SEH (Structured Exception Handling).
This is the first step of this project; only X86_64 target is enabled in this patch.
Compiler options:
For clang-cl.exe, the option is -EHa, the same as MSVC.
For clang.exe, the extra option is -fasync-exceptions,
plus -triple x86_64-windows -fexceptions and -fcxx-exceptions as usual.
NOTE:: Without the -EHa or -fasync-exceptions, this patch is a NO-DIFF change.
The rules for C code:
For C-code, one way (MSVC approach) to achieve SEH -EHa semantic is to follow
three rules:
* First, no exception can move in or out of _try region., i.e., no "potential
faulty instruction can be moved across _try boundary.
* Second, the order of exceptions for instructions 'directly' under a _try
must be preserved (not applied to those in callees).
* Finally, global states (local/global/heap variables) that can be read
outside of _try region must be updated in memory (not just in register)
before the subsequent exception occurs.
The impact to C++ code:
Although SEH is a feature for C code, -EHa does have a profound effect on C++
side. When a C++ function (in the same compilation unit with option -EHa ) is
called by a SEH C function, a hardware exception occurs in C++ code can also
be handled properly by an upstream SEH _try-handler or a C++ catch(...).
As such, when that happens in the middle of an object's life scope, the dtor
must be invoked the same way as C++ Synchronous Exception during unwinding
process.
Design:
A natural way to achieve the rules above in LLVM today is to allow an EH edge
added on memory/computation instruction (previous iload/istore idea) so that
exception path is modeled in Flow graph preciously. However, tracking every
single memory instruction and potential faulty instruction can create many
Invokes, complicate flow graph and possibly result in negative performance
impact for downstream optimization and code generation. Making all
optimizations be aware of the new semantic is also substantial.
This design does not intend to model exception path at instruction level.
Instead, the proposed design tracks and reports EH state at BLOCK-level to
reduce the complexity of flow graph and minimize the performance-impact on CPP
code under -EHa option.
One key element of this design is the ability to compute State number at
block-level. Our algorithm is based on the following rationales:
A _try scope is always a SEME (Single Entry Multiple Exits) region as jumping
into a _try is not allowed. The single entry must start with a seh_try_begin()
invoke with a correct State number that is the initial state of the SEME.
Through control-flow, state number is propagated into all blocks. Side exits
marked by seh_try_end() will unwind to parent state based on existing
SEHUnwindMap[].
Note side exits can ONLY jump into parent scopes (lower state number).
Thus, when a block succeeds various states from its predecessors, the lowest
State triumphs others. If some exits flow to unreachable, propagation on those
paths terminate, not affecting remaining blocks.
For CPP code, object lifetime region is usually a SEME as SEH _try.
However there is one rare exception: jumping into a lifetime that has Dtor but
has no Ctor is warned, but allowed:
Warning: jump bypasses variable with a non-trivial destructor
In that case, the region is actually a MEME (multiple entry multiple exits).
Our solution is to inject a eha_scope_begin() invoke in the side entry block to
ensure a correct State.
Implementation:
Part-1: Clang implementation described below.
Two intrinsic are created to track CPP object scopes; eha_scope_begin() and eha_scope_end().
_scope_begin() is immediately added after ctor() is called and EHStack is pushed.
So it must be an invoke, not a call. With that it's also guaranteed an
EH-cleanup-pad is created regardless whether there exists a call in this scope.
_scope_end is added before dtor(). These two intrinsics make the computation of
Block-State possible in downstream code gen pass, even in the presence of
ctor/dtor inlining.
Two intrinsic, seh_try_begin() and seh_try_end(), are added for C-code to mark
_try boundary and to prevent from exceptions being moved across _try boundary.
All memory instructions inside a _try are considered as 'volatile' to assure
2nd and 3rd rules for C-code above. This is a little sub-optimized. But it's
acceptable as the amount of code directly under _try is very small.
Part-2 (will be in Part-2 patch): LLVM implementation described below.
For both C++ & C-code, the state of each block is computed at the same place in
BE (WinEHPreparing pass) where all other EH tables/maps are calculated.
In addition to _scope_begin & _scope_end, the computation of block state also
rely on the existing State tracking code (UnwindMap and InvokeStateMap).
For both C++ & C-code, the state of each block with potential trap instruction
is marked and reported in DAG Instruction Selection pass, the same place where
the state for -EHsc (synchronous exceptions) is done.
If the first instruction in a reported block scope can trap, a Nop is injected
before this instruction. This nop is needed to accommodate LLVM Windows EH
implementation, in which the address in IPToState table is offset by +1.
(note the purpose of that is to ensure the return address of a call is in the
same scope as the call address.
The handler for catch(...) for -EHa must handle HW exception. So it is
'adjective' flag is reset (it cannot be IsStdDotDot (0x40) that only catches
C++ exceptions).
Suppress push/popTerminate() scope (from noexcept/noTHrow) so that HW
exceptions can be passed through.
Original llvm-dev [RFC] discussions can be found in these two threads below:
https://lists.llvm.org/pipermail/llvm-dev/2020-March/140541.html
https://lists.llvm.org/pipermail/llvm-dev/2020-April/141338.html
Differential Revision: https://reviews.llvm.org/D80344/new/
2021-05-18 13:06:32 +08:00
|
|
|
|
|
|
|
// IsEHa - emit eha.scope.begin if it's a side entry of a scope
|
|
|
|
if (getLangOpts().EHAsynch && S.isSideEntry())
|
|
|
|
EmitSehCppScopeBegin();
|
|
|
|
|
2007-06-02 02:02:12 +08:00
|
|
|
EmitStmt(S.getSubStmt());
|
|
|
|
}
|
|
|
|
|
2012-04-14 08:33:13 +08:00
|
|
|
void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
|
2020-05-22 07:44:30 +08:00
|
|
|
bool nomerge = false;
|
2022-03-01 04:20:59 +08:00
|
|
|
bool noinline = false;
|
2022-03-15 04:44:59 +08:00
|
|
|
bool alwaysinline = false;
|
2021-04-16 07:49:19 +08:00
|
|
|
const CallExpr *musttail = nullptr;
|
|
|
|
|
|
|
|
for (const auto *A : S.getAttrs()) {
|
2022-03-15 04:44:59 +08:00
|
|
|
switch (A->getKind()) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case attr::NoMerge:
|
2020-05-22 07:44:30 +08:00
|
|
|
nomerge = true;
|
2022-03-15 04:44:59 +08:00
|
|
|
break;
|
|
|
|
case attr::NoInline:
|
2022-03-01 04:20:59 +08:00
|
|
|
noinline = true;
|
2022-03-15 04:44:59 +08:00
|
|
|
break;
|
|
|
|
case attr::AlwaysInline:
|
|
|
|
alwaysinline = true;
|
|
|
|
break;
|
|
|
|
case attr::MustTail:
|
2021-04-16 07:49:19 +08:00
|
|
|
const Stmt *Sub = S.getSubStmt();
|
|
|
|
const ReturnStmt *R = cast<ReturnStmt>(Sub);
|
|
|
|
musttail = cast<CallExpr>(R->getRetValue()->IgnoreParens());
|
2022-03-15 04:44:59 +08:00
|
|
|
break;
|
2021-04-16 07:49:19 +08:00
|
|
|
}
|
|
|
|
}
|
2020-05-22 07:44:30 +08:00
|
|
|
SaveAndRestore<bool> save_nomerge(InNoMergeAttributedStmt, nomerge);
|
2022-03-01 04:20:59 +08:00
|
|
|
SaveAndRestore<bool> save_noinline(InNoInlineAttributedStmt, noinline);
|
2022-03-15 04:44:59 +08:00
|
|
|
SaveAndRestore<bool> save_alwaysinline(InAlwaysInlineAttributedStmt,
|
|
|
|
alwaysinline);
|
2021-04-16 07:49:19 +08:00
|
|
|
SaveAndRestore<const CallExpr *> save_musttail(MustTailCall, musttail);
|
2017-09-06 16:47:18 +08:00
|
|
|
EmitStmt(S.getSubStmt(), S.getAttrs());
|
2012-04-14 08:33:13 +08:00
|
|
|
}
|
|
|
|
|
2007-06-02 02:02:12 +08:00
|
|
|
void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) {
|
2008-11-12 16:21:33 +08:00
|
|
|
// If this code is reachable then emit a stop point (if generating
|
|
|
|
// debug info). We have to do this ourselves because we are on the
|
|
|
|
// "simple" statement path.
|
|
|
|
if (HaveInsertPoint())
|
|
|
|
EmitStopPoint(&S);
|
2009-02-07 20:52:26 +08:00
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
EmitBranchThroughCleanup(getJumpDestForLabel(S.getLabel()));
|
2007-06-02 02:02:12 +08:00
|
|
|
}
|
|
|
|
|
2009-10-13 14:55:33 +08:00
|
|
|
|
2008-08-05 00:51:22 +08:00
|
|
|
void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
|
2011-02-17 15:39:24 +08:00
|
|
|
if (const LabelDecl *Target = S.getConstantTarget()) {
|
2010-10-28 16:53:48 +08:00
|
|
|
EmitBranchThroughCleanup(getJumpDestForLabel(Target));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2009-11-07 02:10:47 +08:00
|
|
|
// Ensure that we have an i8* for our PHI node.
|
2009-10-29 07:59:40 +08:00
|
|
|
llvm::Value *V = Builder.CreateBitCast(EmitScalarExpr(S.getTarget()),
|
2011-02-08 16:22:06 +08:00
|
|
|
Int8PtrTy, "addr");
|
2009-10-13 14:55:33 +08:00
|
|
|
llvm::BasicBlock *CurBB = Builder.GetInsertBlock();
|
|
|
|
|
|
|
|
// Get the basic block for the indirect goto.
|
|
|
|
llvm::BasicBlock *IndGotoBB = GetIndirectGotoBlock();
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2009-10-13 14:55:33 +08:00
|
|
|
// The first instruction in the block has to be the PHI for the switch dest,
|
|
|
|
// add an entry for this branch.
|
|
|
|
cast<llvm::PHINode>(IndGotoBB->begin())->addIncoming(V, CurBB);
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2009-10-13 14:55:33 +08:00
|
|
|
EmitBranch(IndGotoBB);
|
2008-08-05 00:51:22 +08:00
|
|
|
}
|
|
|
|
|
2008-11-11 15:24:28 +08:00
|
|
|
void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
|
2021-10-05 20:02:53 +08:00
|
|
|
// The else branch of a consteval if statement is always the only branch that
|
|
|
|
// can be runtime evaluated.
|
|
|
|
if (S.isConsteval()) {
|
|
|
|
const Stmt *Executed = S.isNegatedConsteval() ? S.getThen() : S.getElse();
|
|
|
|
if (Executed) {
|
|
|
|
RunCleanupsScope ExecutedScope(*this);
|
|
|
|
EmitStmt(Executed);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2007-06-02 02:02:12 +08:00
|
|
|
// C99 6.8.4.1: The first substatement is executed if the expression compares
|
|
|
|
// unequal to 0. The condition must be a scalar type.
|
2014-05-29 03:10:59 +08:00
|
|
|
LexicalScope ConditionScope(*this, S.getCond()->getSourceRange());
|
2013-06-08 08:16:55 +08:00
|
|
|
|
2016-07-14 08:11:03 +08:00
|
|
|
if (S.getInit())
|
|
|
|
EmitStmt(S.getInit());
|
|
|
|
|
2009-11-24 07:44:04 +08:00
|
|
|
if (S.getConditionVariable())
|
2018-03-18 05:01:27 +08:00
|
|
|
EmitDecl(*S.getConditionVariable());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
Make emission of 'if' conditions much more sophisticated when we
have a condition that is an &&/||. Before we used to compile things like this:
int test() {
if (x && y) foo(); else bar();
}
into:
%0 = load i32* @x ; <i32> [#uses=1]
%1 = icmp ne i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %land_rhs, label %land_cont
land_rhs: ; preds = %entry
%2 = load i32* @y ; <i32> [#uses=1]
%3 = icmp ne i32 %2, 0 ; <i1> [#uses=1]
br label %land_cont
land_cont: ; preds = %land_rhs, %entry
%4 = phi i1 [ false, %entry ], [ %3, %land_rhs ] ; <i1> [#uses=1]
br i1 %4, label %ifthen, label %ifelse
ifthen: ; preds = %land_cont
%call = call i32 (...)* @foo() ; <i32> [#uses=0]
br label %ifend
ifelse: ; preds = %land_cont
%call1 = call i32 (...)* @bar() ; <i32> [#uses=0]
br label %ifend
ifend: ; preds = %ifelse, %ifthen
Now we turn it into the much more svelte code:
%0 = load i32* @x ; <i32> [#uses=1]
%1 = icmp ne i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %land_lhs_true, label %ifelse
land_lhs_true: ; preds = %entry
%2 = load i32* @y ; <i32> [#uses=1]
%3 = icmp ne i32 %2, 0 ; <i1> [#uses=1]
br i1 %3, label %ifthen, label %ifelse
ifthen: ; preds = %land_lhs_true
%call = call i32 (...)* @foo() ; <i32> [#uses=0]
br label %ifend
ifelse: ; preds = %land_lhs_true, %entry
%call1 = call i32 (...)* @bar() ; <i32> [#uses=0]
br label %ifend
ifend: ; preds = %ifelse, %ifthen
Note the lack of a phi node.
This shrinks the -O0 .ll file for 176.gcc/expr.c from 43176 to 40267 lines.
llvm-svn: 59111
2008-11-12 15:46:33 +08:00
|
|
|
// If the condition constant folds and can be elided, try to avoid emitting
|
|
|
|
// the condition and the dead arm of the if/else.
|
2011-02-28 07:02:32 +08:00
|
|
|
bool CondConstant;
|
2016-06-24 03:16:49 +08:00
|
|
|
if (ConstantFoldsToSimpleInteger(S.getCond(), CondConstant,
|
|
|
|
S.isConstexpr())) {
|
2008-11-11 15:24:28 +08:00
|
|
|
// Figure out which block (then or else) is executed.
|
2011-02-28 07:02:32 +08:00
|
|
|
const Stmt *Executed = S.getThen();
|
|
|
|
const Stmt *Skipped = S.getElse();
|
|
|
|
if (!CondConstant) // Condition false?
|
2008-11-11 15:24:28 +08:00
|
|
|
std::swap(Executed, Skipped);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-11-11 15:24:28 +08:00
|
|
|
// If the skipped block has no labels in it, just emit the executed block.
|
|
|
|
// This avoids emitting dead code and simplifies the CFG substantially.
|
2016-06-24 03:16:49 +08:00
|
|
|
if (S.isConstexpr() || !ContainsLabel(Skipped)) {
|
2014-01-07 06:27:43 +08:00
|
|
|
if (CondConstant)
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2009-11-25 00:43:22 +08:00
|
|
|
if (Executed) {
|
2010-07-06 09:34:17 +08:00
|
|
|
RunCleanupsScope ExecutedScope(*this);
|
2008-11-11 15:24:28 +08:00
|
|
|
EmitStmt(Executed);
|
2009-11-25 00:43:22 +08:00
|
|
|
}
|
2008-11-11 15:24:28 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
Make emission of 'if' conditions much more sophisticated when we
have a condition that is an &&/||. Before we used to compile things like this:
int test() {
if (x && y) foo(); else bar();
}
into:
%0 = load i32* @x ; <i32> [#uses=1]
%1 = icmp ne i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %land_rhs, label %land_cont
land_rhs: ; preds = %entry
%2 = load i32* @y ; <i32> [#uses=1]
%3 = icmp ne i32 %2, 0 ; <i1> [#uses=1]
br label %land_cont
land_cont: ; preds = %land_rhs, %entry
%4 = phi i1 [ false, %entry ], [ %3, %land_rhs ] ; <i1> [#uses=1]
br i1 %4, label %ifthen, label %ifelse
ifthen: ; preds = %land_cont
%call = call i32 (...)* @foo() ; <i32> [#uses=0]
br label %ifend
ifelse: ; preds = %land_cont
%call1 = call i32 (...)* @bar() ; <i32> [#uses=0]
br label %ifend
ifend: ; preds = %ifelse, %ifthen
Now we turn it into the much more svelte code:
%0 = load i32* @x ; <i32> [#uses=1]
%1 = icmp ne i32 %0, 0 ; <i1> [#uses=1]
br i1 %1, label %land_lhs_true, label %ifelse
land_lhs_true: ; preds = %entry
%2 = load i32* @y ; <i32> [#uses=1]
%3 = icmp ne i32 %2, 0 ; <i1> [#uses=1]
br i1 %3, label %ifthen, label %ifelse
ifthen: ; preds = %land_lhs_true
%call = call i32 (...)* @foo() ; <i32> [#uses=0]
br label %ifend
ifelse: ; preds = %land_lhs_true, %entry
%call1 = call i32 (...)* @bar() ; <i32> [#uses=0]
br label %ifend
ifend: ; preds = %ifelse, %ifthen
Note the lack of a phi node.
This shrinks the -O0 .ll file for 176.gcc/expr.c from 43176 to 40267 lines.
llvm-svn: 59111
2008-11-12 15:46:33 +08:00
|
|
|
|
|
|
|
// Otherwise, the condition did not fold, or we couldn't elide it. Just emit
|
|
|
|
// the conditional branch.
|
2008-11-13 08:47:57 +08:00
|
|
|
llvm::BasicBlock *ThenBlock = createBasicBlock("if.then");
|
|
|
|
llvm::BasicBlock *ContBlock = createBasicBlock("if.end");
|
|
|
|
llvm::BasicBlock *ElseBlock = ContBlock;
|
2007-06-02 02:02:12 +08:00
|
|
|
if (S.getElse())
|
2008-11-13 08:47:57 +08:00
|
|
|
ElseBlock = createBasicBlock("if.else");
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2020-09-10 01:12:32 +08:00
|
|
|
// Prefer the PGO based weights over the likelihood attribute.
|
|
|
|
// When the build isn't optimized the metadata isn't used, so don't generate
|
|
|
|
// it.
|
2020-10-04 20:21:00 +08:00
|
|
|
Stmt::Likelihood LH = Stmt::LH_None;
|
2020-09-10 01:12:32 +08:00
|
|
|
uint64_t Count = getProfileCount(S.getThen());
|
2020-10-04 20:21:00 +08:00
|
|
|
if (!Count && CGM.getCodeGenOpts().OptimizationLevel)
|
|
|
|
LH = Stmt::getLikelihood(S.getThen(), S.getElse());
|
|
|
|
EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Count, LH);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2007-06-02 02:02:12 +08:00
|
|
|
// Emit the 'then' code.
|
2014-01-07 06:27:43 +08:00
|
|
|
EmitBlock(ThenBlock);
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2009-11-25 00:43:22 +08:00
|
|
|
{
|
2010-07-06 09:34:17 +08:00
|
|
|
RunCleanupsScope ThenScope(*this);
|
2009-11-25 00:43:22 +08:00
|
|
|
EmitStmt(S.getThen());
|
|
|
|
}
|
2016-07-22 07:28:18 +08:00
|
|
|
EmitBranch(ContBlock);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2007-06-02 02:02:12 +08:00
|
|
|
// Emit the 'else' code if present.
|
|
|
|
if (const Stmt *Else = S.getElse()) {
|
2014-07-11 04:42:59 +08:00
|
|
|
{
|
2015-02-04 02:40:42 +08:00
|
|
|
// There is no need to emit line number for an unconditional branch.
|
2015-02-04 04:00:54 +08:00
|
|
|
auto NL = ApplyDebugLocation::CreateEmpty(*this);
|
2014-07-11 04:42:59 +08:00
|
|
|
EmitBlock(ElseBlock);
|
|
|
|
}
|
2009-11-25 00:43:22 +08:00
|
|
|
{
|
2010-07-06 09:34:17 +08:00
|
|
|
RunCleanupsScope ElseScope(*this);
|
2009-11-25 00:43:22 +08:00
|
|
|
EmitStmt(Else);
|
|
|
|
}
|
2014-07-11 04:42:59 +08:00
|
|
|
{
|
2015-02-04 02:40:42 +08:00
|
|
|
// There is no need to emit line number for an unconditional branch.
|
2015-02-04 04:00:54 +08:00
|
|
|
auto NL = ApplyDebugLocation::CreateEmpty(*this);
|
2014-07-11 04:42:59 +08:00
|
|
|
EmitBranch(ContBlock);
|
|
|
|
}
|
2007-06-02 02:02:12 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2007-06-02 02:02:12 +08:00
|
|
|
// Emit the continuation block for code after the if.
|
2008-11-13 09:54:24 +08:00
|
|
|
EmitBlock(ContBlock, true);
|
2007-06-02 02:02:12 +08:00
|
|
|
}
|
|
|
|
|
2014-06-06 20:40:24 +08:00
|
|
|
void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
|
2014-08-27 14:28:16 +08:00
|
|
|
ArrayRef<const Attr *> WhileAttrs) {
|
2010-07-06 09:34:17 +08:00
|
|
|
// Emit the header for the loop, which will also become
|
|
|
|
// the continue target.
|
|
|
|
JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
|
2010-07-24 05:56:41 +08:00
|
|
|
EmitBlock(LoopHeader.getBlock());
|
2010-07-06 09:34:17 +08:00
|
|
|
|
|
|
|
// Create an exit block for when the condition fails, which will
|
|
|
|
// also become the break target.
|
|
|
|
JumpDest LoopExit = getJumpDestInCurrentScope("while.end");
|
2009-02-08 02:08:12 +08:00
|
|
|
|
|
|
|
// Store the blocks to use for break and continue.
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, LoopHeader));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-11-25 05:15:44 +08:00
|
|
|
// C++ [stmt.while]p2:
|
|
|
|
// When the condition of a while statement is a declaration, the
|
|
|
|
// scope of the variable that is declared extends from its point
|
|
|
|
// of declaration (3.3.2) to the end of the while statement.
|
|
|
|
// [...]
|
|
|
|
// The object created in a condition is destroyed and created
|
|
|
|
// with each iteration of the loop.
|
2010-07-06 09:34:17 +08:00
|
|
|
RunCleanupsScope ConditionScope(*this);
|
2009-11-25 05:15:44 +08:00
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
if (S.getConditionVariable())
|
2018-03-18 05:01:27 +08:00
|
|
|
EmitDecl(*S.getConditionVariable());
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2009-02-08 01:18:33 +08:00
|
|
|
// Evaluate the conditional in the while header. C99 6.8.5.1: The
|
|
|
|
// evaluation of the controlling expression takes place before each
|
|
|
|
// execution of the loop body.
|
2007-06-06 04:53:16 +08:00
|
|
|
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2007-10-10 04:51:27 +08:00
|
|
|
// while(1) is common, avoid extra exit blocks. Be sure
|
2007-06-06 04:53:16 +08:00
|
|
|
// to correctly handle break/continue though.
|
2021-04-30 21:13:47 +08:00
|
|
|
llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal);
|
|
|
|
bool CondIsConstInt = C != nullptr;
|
|
|
|
bool EmitBoolCondBranch = !CondIsConstInt || !C->isOne();
|
2020-11-03 05:03:21 +08:00
|
|
|
const SourceRange &R = S.getSourceRange();
|
|
|
|
LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), CGM.getCodeGenOpts(),
|
|
|
|
WhileAttrs, SourceLocToDebugLoc(R.getBegin()),
|
2021-04-30 21:13:47 +08:00
|
|
|
SourceLocToDebugLoc(R.getEnd()),
|
|
|
|
checkIfLoopMustProgress(CondIsConstInt));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2007-06-05 11:59:43 +08:00
|
|
|
// As long as the condition is true, go to the loop body.
|
2010-07-06 09:34:17 +08:00
|
|
|
llvm::BasicBlock *LoopBody = createBasicBlock("while.body");
|
|
|
|
if (EmitBoolCondBranch) {
|
2010-07-24 05:56:41 +08:00
|
|
|
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
|
2010-07-06 09:34:17 +08:00
|
|
|
if (ConditionScope.requiresCleanups())
|
|
|
|
ExitBlock = createBasicBlock("while.exit");
|
2021-03-22 03:13:47 +08:00
|
|
|
llvm::MDNode *Weights =
|
|
|
|
createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()));
|
|
|
|
if (!Weights && CGM.getCodeGenOpts().OptimizationLevel)
|
|
|
|
BoolCondVal = emitCondLikelihoodViaExpectIntrinsic(
|
|
|
|
BoolCondVal, Stmt::getLikelihood(S.getBody()));
|
2020-10-31 20:07:06 +08:00
|
|
|
Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, Weights);
|
2010-07-06 09:34:17 +08:00
|
|
|
|
2010-07-24 05:56:41 +08:00
|
|
|
if (ExitBlock != LoopExit.getBlock()) {
|
2010-07-06 09:34:17 +08:00
|
|
|
EmitBlock(ExitBlock);
|
|
|
|
EmitBranchThroughCleanup(LoopExit);
|
|
|
|
}
|
2020-10-31 20:07:06 +08:00
|
|
|
} else if (const Attr *A = Stmt::getLikelihoodAttr(S.getBody())) {
|
|
|
|
CGM.getDiags().Report(A->getLocation(),
|
|
|
|
diag::warn_attribute_has_no_effect_on_infinite_loop)
|
|
|
|
<< A << A->getRange();
|
|
|
|
CGM.getDiags().Report(
|
|
|
|
S.getWhileLoc(),
|
|
|
|
diag::note_attribute_has_no_effect_on_infinite_loop_here)
|
|
|
|
<< SourceRange(S.getWhileLoc(), S.getRParenLoc());
|
2010-07-06 09:34:17 +08:00
|
|
|
}
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
// Emit the loop body. We have to emit this in a cleanup scope
|
|
|
|
// because it might be a singleton DeclStmt.
|
2009-11-25 05:15:44 +08:00
|
|
|
{
|
2010-07-06 09:34:17 +08:00
|
|
|
RunCleanupsScope BodyScope(*this);
|
2009-11-25 05:15:44 +08:00
|
|
|
EmitBlock(LoopBody);
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2009-11-25 05:15:44 +08:00
|
|
|
EmitStmt(S.getBody());
|
|
|
|
}
|
2007-07-17 05:28:45 +08:00
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
BreakContinueStack.pop_back();
|
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
// Immediately force cleanup.
|
|
|
|
ConditionScope.ForceCleanup();
|
2009-11-25 05:15:44 +08:00
|
|
|
|
2014-08-16 05:11:25 +08:00
|
|
|
EmitStopPoint(&S);
|
2010-07-06 09:34:17 +08:00
|
|
|
// Branch to the loop header again.
|
2010-07-24 05:56:41 +08:00
|
|
|
EmitBranch(LoopHeader.getBlock());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-05-22 16:54:05 +08:00
|
|
|
LoopStack.pop();
|
|
|
|
|
2007-06-05 11:59:43 +08:00
|
|
|
// Emit the exit block.
|
2010-07-24 05:56:41 +08:00
|
|
|
EmitBlock(LoopExit.getBlock(), true);
|
2009-11-25 05:15:44 +08:00
|
|
|
|
2009-04-01 12:37:47 +08:00
|
|
|
// The LoopHeader typically is just a branch if we skipped emitting
|
|
|
|
// a branch, try to erase it.
|
2010-07-06 09:34:17 +08:00
|
|
|
if (!EmitBoolCondBranch)
|
2010-07-24 05:56:41 +08:00
|
|
|
SimplifyForwardingBlocks(LoopHeader.getBlock());
|
2007-06-05 11:59:43 +08:00
|
|
|
}
|
|
|
|
|
2014-06-06 20:40:24 +08:00
|
|
|
void CodeGenFunction::EmitDoStmt(const DoStmt &S,
|
2014-08-27 14:28:16 +08:00
|
|
|
ArrayRef<const Attr *> DoAttrs) {
|
2010-07-06 09:34:17 +08:00
|
|
|
JumpDest LoopExit = getJumpDestInCurrentScope("do.end");
|
|
|
|
JumpDest LoopCond = getJumpDestInCurrentScope("do.cond");
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2015-04-24 07:06:47 +08:00
|
|
|
uint64_t ParentCount = getCurrentProfileCount();
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2007-07-17 05:28:45 +08:00
|
|
|
// Store the blocks to use for break and continue.
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, LoopCond));
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
// Emit the body of the loop.
|
|
|
|
llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
|
2014-05-22 16:54:05 +08:00
|
|
|
|
2015-04-24 07:06:47 +08:00
|
|
|
EmitBlockWithFallThrough(LoopBody, &S);
|
2010-07-06 09:34:17 +08:00
|
|
|
{
|
|
|
|
RunCleanupsScope BodyScope(*this);
|
|
|
|
EmitStmt(S.getBody());
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-07-24 05:56:41 +08:00
|
|
|
EmitBlock(LoopCond.getBlock());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2007-06-06 04:53:16 +08:00
|
|
|
// C99 6.8.5.2: "The evaluation of the controlling expression takes place
|
|
|
|
// after each execution of the loop body."
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2007-06-06 04:53:16 +08:00
|
|
|
// Evaluate the conditional in the while header.
|
|
|
|
// C99 6.8.5p2/p4: The first substatement is executed if the expression
|
|
|
|
// compares unequal to 0. The condition must be a scalar type.
|
|
|
|
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
|
2007-10-10 04:33:39 +08:00
|
|
|
|
2014-01-23 23:05:00 +08:00
|
|
|
BreakContinueStack.pop_back();
|
|
|
|
|
2007-10-10 04:33:39 +08:00
|
|
|
// "do {} while (0)" is common in macros, avoid extra blocks. Be sure
|
|
|
|
// to correctly handle break/continue though.
|
2021-04-30 21:13:47 +08:00
|
|
|
llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal);
|
|
|
|
bool CondIsConstInt = C;
|
|
|
|
bool EmitBoolCondBranch = !C || !C->isZero();
|
2020-11-03 05:03:21 +08:00
|
|
|
|
|
|
|
const SourceRange &R = S.getSourceRange();
|
|
|
|
LoopStack.push(LoopBody, CGM.getContext(), CGM.getCodeGenOpts(), DoAttrs,
|
|
|
|
SourceLocToDebugLoc(R.getBegin()),
|
2021-04-30 21:13:47 +08:00
|
|
|
SourceLocToDebugLoc(R.getEnd()),
|
|
|
|
checkIfLoopMustProgress(CondIsConstInt));
|
2007-10-10 04:33:39 +08:00
|
|
|
|
2007-06-06 04:53:16 +08:00
|
|
|
// As long as the condition is true, iterate the loop.
|
2014-06-06 20:40:24 +08:00
|
|
|
if (EmitBoolCondBranch) {
|
2015-04-24 07:06:47 +08:00
|
|
|
uint64_t BackedgeCount = getProfileCount(S.getBody()) - ParentCount;
|
2015-07-28 04:10:20 +08:00
|
|
|
Builder.CreateCondBr(
|
2015-05-02 13:00:55 +08:00
|
|
|
BoolCondVal, LoopBody, LoopExit.getBlock(),
|
|
|
|
createProfileWeightsForLoop(S.getCond(), BackedgeCount));
|
2014-06-06 20:40:24 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-05-22 16:54:05 +08:00
|
|
|
LoopStack.pop();
|
|
|
|
|
2007-06-06 04:53:16 +08:00
|
|
|
// Emit the exit block.
|
2010-07-24 05:56:41 +08:00
|
|
|
EmitBlock(LoopExit.getBlock());
|
2007-10-10 04:33:39 +08:00
|
|
|
|
2009-04-01 12:37:47 +08:00
|
|
|
// The DoCond block typically is just a branch if we skipped
|
|
|
|
// emitting a branch, try to erase it.
|
|
|
|
if (!EmitBoolCondBranch)
|
2010-07-24 05:56:41 +08:00
|
|
|
SimplifyForwardingBlocks(LoopCond.getBlock());
|
2007-06-06 04:53:16 +08:00
|
|
|
}
|
|
|
|
|
2014-06-06 20:40:24 +08:00
|
|
|
void CodeGenFunction::EmitForStmt(const ForStmt &S,
|
2014-08-27 14:28:16 +08:00
|
|
|
ArrayRef<const Attr *> ForAttrs) {
|
2010-07-06 09:34:17 +08:00
|
|
|
JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
|
|
|
|
|
2014-08-23 05:37:04 +08:00
|
|
|
LexicalScope ForScope(*this, S.getSourceRange());
|
2010-08-25 08:28:56 +08:00
|
|
|
|
2007-06-06 04:53:16 +08:00
|
|
|
// Evaluate the first part before the loop.
|
|
|
|
if (S.getInit())
|
|
|
|
EmitStmt(S.getInit());
|
|
|
|
|
|
|
|
// Start the loop with a block that tests the condition.
|
2010-07-06 09:34:17 +08:00
|
|
|
// If there's an increment, the continue scope will be overwritten
|
|
|
|
// later.
|
2021-03-18 05:00:03 +08:00
|
|
|
JumpDest CondDest = getJumpDestInCurrentScope("for.cond");
|
|
|
|
llvm::BasicBlock *CondBlock = CondDest.getBlock();
|
2007-06-06 04:53:16 +08:00
|
|
|
EmitBlock(CondBlock);
|
|
|
|
|
2020-11-03 05:03:21 +08:00
|
|
|
Expr::EvalResult Result;
|
2021-04-30 21:13:47 +08:00
|
|
|
bool CondIsConstInt =
|
|
|
|
!S.getCond() || S.getCond()->EvaluateAsInt(Result, getContext());
|
2020-11-03 05:03:21 +08:00
|
|
|
|
2016-11-10 22:44:30 +08:00
|
|
|
const SourceRange &R = S.getSourceRange();
|
[Clang] Add llvm.loop.unroll.disable to loops with -fno-unroll-loops.
Currently Clang does not respect -fno-unroll-loops during LTO. During
D76916 it was suggested to respect -fno-unroll-loops on a TU basis.
This patch uses the existing llvm.loop.unroll.disable metadata to
disable loop unrolling explicitly for each loop in the TU if
unrolling is disabled. This should ensure that loops from TUs compiled
with -fno-unroll-loops are skipped by the unroller during LTO.
This also means that if a loop from a TU with -fno-unroll-loops
gets inlined into a TU without this option, the loop won't be
unrolled.
Due to the fact that some transforms might drop loop metadata, there
potentially are cases in which we still unroll loops from TUs with
-fno-unroll-loops. I think we should fix those issues rather than
introducing a function attribute to disable loop unrolling during LTO.
Improving the metadata handling will benefit other use cases, like
various loop pragmas, too. And it is an improvement to clang completely
ignoring -fno-unroll-loops during LTO.
If that direction looks good, we can use a similar approach to also
respect -fno-vectorize during LTO, at least for LoopVectorize.
In the future, this might also allow us to remove the UnrollLoops option
LLVM's PassManagerBuilder.
Reviewers: Meinersbur, hfinkel, dexonsmith, tejohnson
Reviewed By: Meinersbur, tejohnson
Differential Revision: https://reviews.llvm.org/D77058
2020-04-07 20:43:48 +08:00
|
|
|
LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
|
2016-11-10 22:44:30 +08:00
|
|
|
SourceLocToDebugLoc(R.getBegin()),
|
2021-04-30 21:13:47 +08:00
|
|
|
SourceLocToDebugLoc(R.getEnd()),
|
|
|
|
checkIfLoopMustProgress(CondIsConstInt));
|
2014-05-22 16:54:05 +08:00
|
|
|
|
2009-11-25 09:51:31 +08:00
|
|
|
// Create a cleanup scope for the condition variable cleanups.
|
2014-08-23 05:37:04 +08:00
|
|
|
LexicalScope ConditionScope(*this, S.getSourceRange());
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2021-03-18 05:00:03 +08:00
|
|
|
// If the for loop doesn't have an increment we can just use the condition as
|
|
|
|
// the continue block. Otherwise, if there is no condition variable, we can
|
|
|
|
// form the continue block now. If there is a condition variable, we can't
|
|
|
|
// form the continue block until after we've emitted the condition, because
|
|
|
|
// the condition is in scope in the increment, but Sema's jump diagnostics
|
|
|
|
// ensure that there are no continues from the condition variable that jump
|
|
|
|
// to the loop increment.
|
|
|
|
JumpDest Continue;
|
|
|
|
if (!S.getInc())
|
|
|
|
Continue = CondDest;
|
|
|
|
else if (!S.getConditionVariable())
|
|
|
|
Continue = getJumpDestInCurrentScope("for.inc");
|
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
|
|
|
|
2007-06-06 04:53:16 +08:00
|
|
|
if (S.getCond()) {
|
2009-11-25 08:27:52 +08:00
|
|
|
// If the for statement has a condition scope, emit the local variable
|
|
|
|
// declaration.
|
2009-11-25 09:51:31 +08:00
|
|
|
if (S.getConditionVariable()) {
|
2018-03-18 05:01:27 +08:00
|
|
|
EmitDecl(*S.getConditionVariable());
|
2021-03-18 05:00:03 +08:00
|
|
|
|
|
|
|
// We have entered the condition variable's scope, so we're now able to
|
|
|
|
// jump to the continue block.
|
2021-03-20 03:35:17 +08:00
|
|
|
Continue = S.getInc() ? getJumpDestInCurrentScope("for.inc") : CondDest;
|
2021-03-18 05:00:03 +08:00
|
|
|
BreakContinueStack.back().ContinueBlock = Continue;
|
2009-11-25 09:51:31 +08:00
|
|
|
}
|
2010-07-06 09:34:17 +08:00
|
|
|
|
2013-11-05 00:13:18 +08:00
|
|
|
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
|
2010-07-06 09:34:17 +08:00
|
|
|
// If there are any cleanups between here and the loop-exit scope,
|
|
|
|
// create a block to stage a loop exit along.
|
|
|
|
if (ForScope.requiresCleanups())
|
|
|
|
ExitBlock = createBasicBlock("for.cond.cleanup");
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2008-11-12 16:04:58 +08:00
|
|
|
// As long as the condition is true, iterate the loop.
|
2008-11-13 09:38:36 +08:00
|
|
|
llvm::BasicBlock *ForBody = createBasicBlock("for.body");
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2007-06-06 04:53:16 +08:00
|
|
|
// C99 6.8.5p2/p4: The first substatement is executed if the expression
|
|
|
|
// compares unequal to 0. The condition must be a scalar type.
|
2013-11-22 18:20:43 +08:00
|
|
|
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
|
2021-03-22 03:13:47 +08:00
|
|
|
llvm::MDNode *Weights =
|
|
|
|
createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()));
|
|
|
|
if (!Weights && CGM.getCodeGenOpts().OptimizationLevel)
|
|
|
|
BoolCondVal = emitCondLikelihoodViaExpectIntrinsic(
|
|
|
|
BoolCondVal, Stmt::getLikelihood(S.getBody()));
|
2020-11-03 05:03:21 +08:00
|
|
|
|
2020-10-31 20:07:06 +08:00
|
|
|
Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights);
|
2014-06-06 20:40:24 +08:00
|
|
|
|
2010-07-24 05:56:41 +08:00
|
|
|
if (ExitBlock != LoopExit.getBlock()) {
|
2010-07-06 09:34:17 +08:00
|
|
|
EmitBlock(ExitBlock);
|
|
|
|
EmitBranchThroughCleanup(LoopExit);
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
|
|
|
EmitBlock(ForBody);
|
2007-06-06 04:53:16 +08:00
|
|
|
} else {
|
|
|
|
// Treat it as a non-zero constant. Don't even create a new block for the
|
|
|
|
// body, just fall into it.
|
|
|
|
}
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2007-06-06 04:53:16 +08:00
|
|
|
|
2009-11-25 09:51:31 +08:00
|
|
|
{
|
|
|
|
// Create a separate cleanup scope for the body, in case it is not
|
|
|
|
// a compound statement.
|
2010-07-06 09:34:17 +08:00
|
|
|
RunCleanupsScope BodyScope(*this);
|
2009-11-25 09:51:31 +08:00
|
|
|
EmitStmt(S.getBody());
|
|
|
|
}
|
2007-07-17 05:28:45 +08:00
|
|
|
|
2007-06-06 04:53:16 +08:00
|
|
|
// If there is an increment, emit it next.
|
2008-09-28 08:19:22 +08:00
|
|
|
if (S.getInc()) {
|
2010-07-24 05:56:41 +08:00
|
|
|
EmitBlock(Continue.getBlock());
|
2007-08-11 08:04:45 +08:00
|
|
|
EmitStmt(S.getInc());
|
2008-09-28 08:19:22 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-05-22 02:36:48 +08:00
|
|
|
BreakContinueStack.pop_back();
|
2010-07-06 09:34:17 +08:00
|
|
|
|
|
|
|
ConditionScope.ForceCleanup();
|
2014-08-23 06:25:37 +08:00
|
|
|
|
|
|
|
EmitStopPoint(&S);
|
2010-07-06 09:34:17 +08:00
|
|
|
EmitBranch(CondBlock);
|
|
|
|
|
|
|
|
ForScope.ForceCleanup();
|
|
|
|
|
2014-05-22 16:54:05 +08:00
|
|
|
LoopStack.pop();
|
|
|
|
|
2007-07-17 05:28:45 +08:00
|
|
|
// Emit the fall-through block.
|
2010-07-24 05:56:41 +08:00
|
|
|
EmitBlock(LoopExit.getBlock(), true);
|
2007-06-06 04:53:16 +08:00
|
|
|
}
|
2007-06-05 11:59:43 +08:00
|
|
|
|
2014-06-06 20:40:24 +08:00
|
|
|
void
|
|
|
|
CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
|
2014-08-27 14:28:16 +08:00
|
|
|
ArrayRef<const Attr *> ForAttrs) {
|
2011-04-15 06:09:26 +08:00
|
|
|
JumpDest LoopExit = getJumpDestInCurrentScope("for.end");
|
|
|
|
|
2014-08-23 05:54:29 +08:00
|
|
|
LexicalScope ForScope(*this, S.getSourceRange());
|
2011-04-15 06:09:26 +08:00
|
|
|
|
|
|
|
// Evaluate the first pieces before the loop.
|
2018-09-29 02:44:09 +08:00
|
|
|
if (S.getInit())
|
|
|
|
EmitStmt(S.getInit());
|
2011-04-15 06:09:26 +08:00
|
|
|
EmitStmt(S.getRangeStmt());
|
2016-03-20 18:33:40 +08:00
|
|
|
EmitStmt(S.getBeginStmt());
|
|
|
|
EmitStmt(S.getEndStmt());
|
2011-04-15 06:09:26 +08:00
|
|
|
|
|
|
|
// Start the loop with a block that tests the condition.
|
|
|
|
// If there's an increment, the continue scope will be overwritten
|
|
|
|
// later.
|
|
|
|
llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
|
|
|
|
EmitBlock(CondBlock);
|
|
|
|
|
2016-11-10 22:44:30 +08:00
|
|
|
const SourceRange &R = S.getSourceRange();
|
[Clang] Add llvm.loop.unroll.disable to loops with -fno-unroll-loops.
Currently Clang does not respect -fno-unroll-loops during LTO. During
D76916 it was suggested to respect -fno-unroll-loops on a TU basis.
This patch uses the existing llvm.loop.unroll.disable metadata to
disable loop unrolling explicitly for each loop in the TU if
unrolling is disabled. This should ensure that loops from TUs compiled
with -fno-unroll-loops are skipped by the unroller during LTO.
This also means that if a loop from a TU with -fno-unroll-loops
gets inlined into a TU without this option, the loop won't be
unrolled.
Due to the fact that some transforms might drop loop metadata, there
potentially are cases in which we still unroll loops from TUs with
-fno-unroll-loops. I think we should fix those issues rather than
introducing a function attribute to disable loop unrolling during LTO.
Improving the metadata handling will benefit other use cases, like
various loop pragmas, too. And it is an improvement to clang completely
ignoring -fno-unroll-loops during LTO.
If that direction looks good, we can use a similar approach to also
respect -fno-vectorize during LTO, at least for LoopVectorize.
In the future, this might also allow us to remove the UnrollLoops option
LLVM's PassManagerBuilder.
Reviewers: Meinersbur, hfinkel, dexonsmith, tejohnson
Reviewed By: Meinersbur, tejohnson
Differential Revision: https://reviews.llvm.org/D77058
2020-04-07 20:43:48 +08:00
|
|
|
LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs,
|
2016-11-10 22:44:30 +08:00
|
|
|
SourceLocToDebugLoc(R.getBegin()),
|
|
|
|
SourceLocToDebugLoc(R.getEnd()));
|
2014-05-22 16:54:05 +08:00
|
|
|
|
2011-04-15 06:09:26 +08:00
|
|
|
// If there are any cleanups between here and the loop-exit scope,
|
|
|
|
// create a block to stage a loop exit along.
|
|
|
|
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
|
|
|
|
if (ForScope.requiresCleanups())
|
|
|
|
ExitBlock = createBasicBlock("for.cond.cleanup");
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2011-04-15 06:09:26 +08:00
|
|
|
// The loop body, consisting of the specified body and the loop variable.
|
|
|
|
llvm::BasicBlock *ForBody = createBasicBlock("for.body");
|
|
|
|
|
|
|
|
// The body is executed if the expression, contextually converted
|
|
|
|
// to bool, is true.
|
2013-11-22 18:20:43 +08:00
|
|
|
llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
|
2021-03-22 03:13:47 +08:00
|
|
|
llvm::MDNode *Weights =
|
|
|
|
createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()));
|
|
|
|
if (!Weights && CGM.getCodeGenOpts().OptimizationLevel)
|
|
|
|
BoolCondVal = emitCondLikelihoodViaExpectIntrinsic(
|
|
|
|
BoolCondVal, Stmt::getLikelihood(S.getBody()));
|
2020-10-31 20:07:06 +08:00
|
|
|
Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights);
|
2014-06-06 20:40:24 +08:00
|
|
|
|
2011-04-15 06:09:26 +08:00
|
|
|
if (ExitBlock != LoopExit.getBlock()) {
|
|
|
|
EmitBlock(ExitBlock);
|
|
|
|
EmitBranchThroughCleanup(LoopExit);
|
|
|
|
}
|
|
|
|
|
|
|
|
EmitBlock(ForBody);
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2011-04-15 06:09:26 +08:00
|
|
|
|
|
|
|
// Create a block for the increment. In case of a 'continue', we jump there.
|
|
|
|
JumpDest Continue = getJumpDestInCurrentScope("for.inc");
|
|
|
|
|
|
|
|
// Store the blocks to use for break and continue.
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
|
2011-04-15 06:09:26 +08:00
|
|
|
|
|
|
|
{
|
|
|
|
// Create a separate cleanup scope for the loop variable and body.
|
2014-08-23 05:54:29 +08:00
|
|
|
LexicalScope BodyScope(*this, S.getSourceRange());
|
2011-04-15 06:09:26 +08:00
|
|
|
EmitStmt(S.getLoopVarStmt());
|
|
|
|
EmitStmt(S.getBody());
|
|
|
|
}
|
|
|
|
|
2014-08-16 04:50:45 +08:00
|
|
|
EmitStopPoint(&S);
|
2011-04-15 06:09:26 +08:00
|
|
|
// If there is an increment, emit it next.
|
|
|
|
EmitBlock(Continue.getBlock());
|
|
|
|
EmitStmt(S.getInc());
|
|
|
|
|
|
|
|
BreakContinueStack.pop_back();
|
|
|
|
|
|
|
|
EmitBranch(CondBlock);
|
|
|
|
|
|
|
|
ForScope.ForceCleanup();
|
|
|
|
|
2014-05-22 16:54:05 +08:00
|
|
|
LoopStack.pop();
|
|
|
|
|
2011-04-15 06:09:26 +08:00
|
|
|
// Emit the fall-through block.
|
|
|
|
EmitBlock(LoopExit.getBlock(), true);
|
|
|
|
}
|
|
|
|
|
2008-09-24 12:00:38 +08:00
|
|
|
void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
|
|
|
|
if (RV.isScalar()) {
|
|
|
|
Builder.CreateStore(RV.getScalarVal(), ReturnValue);
|
|
|
|
} else if (RV.isAggregate()) {
|
2018-01-25 22:21:55 +08:00
|
|
|
LValue Dest = MakeAddrLValue(ReturnValue, Ty);
|
|
|
|
LValue Src = MakeAddrLValue(RV.getAggregateAddress(), Ty);
|
2019-06-21 04:56:20 +08:00
|
|
|
EmitAggregateCopy(Dest, Src, Ty, getOverlapForReturnValue());
|
2008-09-24 12:00:38 +08:00
|
|
|
} else {
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty),
|
2013-03-08 05:37:08 +08:00
|
|
|
/*init*/ true);
|
2008-09-24 12:00:38 +08:00
|
|
|
}
|
2009-02-10 04:31:03 +08:00
|
|
|
EmitBranchThroughCleanup(ReturnBlock);
|
2008-09-24 12:00:38 +08:00
|
|
|
}
|
|
|
|
|
2020-06-04 07:41:50 +08:00
|
|
|
namespace {
|
|
|
|
// RAII struct used to save and restore a return statment's result expression.
|
|
|
|
struct SaveRetExprRAII {
|
|
|
|
SaveRetExprRAII(const Expr *RetExpr, CodeGenFunction &CGF)
|
|
|
|
: OldRetExpr(CGF.RetExpr), CGF(CGF) {
|
|
|
|
CGF.RetExpr = RetExpr;
|
|
|
|
}
|
|
|
|
~SaveRetExprRAII() { CGF.RetExpr = OldRetExpr; }
|
|
|
|
const Expr *OldRetExpr;
|
|
|
|
CodeGenFunction &CGF;
|
|
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
2021-06-23 05:44:29 +08:00
|
|
|
/// If we have 'return f(...);', where both caller and callee are SwiftAsync,
|
|
|
|
/// codegen it as 'tail call ...; ret void;'.
|
|
|
|
static void makeTailCallIfSwiftAsync(const CallExpr *CE, CGBuilderTy &Builder,
|
|
|
|
const CGFunctionInfo *CurFnInfo) {
|
|
|
|
auto calleeQualType = CE->getCallee()->getType();
|
|
|
|
const FunctionType *calleeType = nullptr;
|
|
|
|
if (calleeQualType->isFunctionPointerType() ||
|
|
|
|
calleeQualType->isFunctionReferenceType() ||
|
|
|
|
calleeQualType->isBlockPointerType() ||
|
|
|
|
calleeQualType->isMemberFunctionPointerType()) {
|
|
|
|
calleeType = calleeQualType->getPointeeType()->castAs<FunctionType>();
|
|
|
|
} else if (auto *ty = dyn_cast<FunctionType>(calleeQualType)) {
|
|
|
|
calleeType = ty;
|
|
|
|
} else if (auto CMCE = dyn_cast<CXXMemberCallExpr>(CE)) {
|
|
|
|
if (auto methodDecl = CMCE->getMethodDecl()) {
|
|
|
|
// getMethodDecl() doesn't handle member pointers at the moment.
|
|
|
|
calleeType = methodDecl->getType()->castAs<FunctionType>();
|
|
|
|
} else {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (calleeType->getCallConv() == CallingConv::CC_SwiftAsync &&
|
|
|
|
(CurFnInfo->getASTCallingConvention() == CallingConv::CC_SwiftAsync)) {
|
|
|
|
auto CI = cast<llvm::CallInst>(&Builder.GetInsertBlock()->back());
|
|
|
|
CI->setTailCallKind(llvm::CallInst::TCK_MustTail);
|
|
|
|
Builder.CreateRetVoid();
|
|
|
|
Builder.ClearInsertionPoint();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-06-02 11:19:07 +08:00
|
|
|
/// EmitReturnStmt - Note that due to GCC extensions, this can have an operand
|
|
|
|
/// if the function returns void, or may be missing one if the function returns
|
|
|
|
/// non-void. Fun stuff :).
|
|
|
|
void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
|
2017-06-24 05:32:38 +08:00
|
|
|
if (requiresReturnValueCheck()) {
|
2018-08-10 05:08:08 +08:00
|
|
|
llvm::Constant *SLoc = EmitCheckSourceLocation(S.getBeginLoc());
|
2017-06-24 05:32:38 +08:00
|
|
|
auto *SLocPtr =
|
|
|
|
new llvm::GlobalVariable(CGM.getModule(), SLoc->getType(), false,
|
|
|
|
llvm::GlobalVariable::PrivateLinkage, SLoc);
|
|
|
|
SLocPtr->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
|
|
|
|
CGM.getSanitizerMetadata()->disableSanitizerForGlobal(SLocPtr);
|
|
|
|
assert(ReturnLocation.isValid() && "No valid return location");
|
|
|
|
Builder.CreateStore(Builder.CreateBitCast(SLocPtr, Int8PtrTy),
|
|
|
|
ReturnLocation);
|
|
|
|
}
|
|
|
|
|
2015-04-15 04:59:00 +08:00
|
|
|
// Returning from an outlined SEH helper is UB, and we already warn on it.
|
|
|
|
if (IsOutlinedSEHHelper) {
|
|
|
|
Builder.CreateUnreachable();
|
|
|
|
Builder.ClearInsertionPoint();
|
|
|
|
}
|
|
|
|
|
2018-04-06 23:14:32 +08:00
|
|
|
// Emit the result value, even if unused, to evaluate the side effects.
|
2007-06-02 11:19:07 +08:00
|
|
|
const Expr *RV = S.getRetValue();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2020-06-04 07:41:50 +08:00
|
|
|
// Record the result expression of the return statement. The recorded
|
|
|
|
// expression is used to determine whether a block capture's lifetime should
|
|
|
|
// end at the end of the full expression as opposed to the end of the scope
|
|
|
|
// enclosing the block expression.
|
|
|
|
//
|
|
|
|
// This permits a small, easily-implemented exception to our over-conservative
|
|
|
|
// rules about not jumping to statements following block literals with
|
|
|
|
// non-trivial cleanups.
|
|
|
|
SaveRetExprRAII SaveRetExpr(RV, *this);
|
|
|
|
|
2012-09-25 14:56:03 +08:00
|
|
|
RunCleanupsScope cleanupScope(*this);
|
2020-06-14 20:39:14 +08:00
|
|
|
if (const auto *EWC = dyn_cast_or_null<ExprWithCleanups>(RV))
|
|
|
|
RV = EWC->getSubExpr();
|
2008-09-10 05:00:17 +08:00
|
|
|
// FIXME: Clean this up by using an LValue for ReturnTemp,
|
|
|
|
// EmitStoreThroughLValue, and EmitAnyExpr.
|
2020-06-02 00:39:25 +08:00
|
|
|
// Check if the NRVO candidate was not globalized in OpenMP mode.
|
|
|
|
if (getLangOpts().ElideConstructors && S.getNRVOCandidate() &&
|
|
|
|
S.getNRVOCandidate()->isNRVOVariable() &&
|
|
|
|
(!getLangOpts().OpenMP ||
|
|
|
|
!CGM.getOpenMPRuntime()
|
|
|
|
.getAddressOfLocalVariable(*this, S.getNRVOCandidate())
|
|
|
|
.isValid())) {
|
2010-05-15 14:46:45 +08:00
|
|
|
// Apply the named return value optimization for this return statement,
|
|
|
|
// which means doing nothing: the appropriate result has already been
|
|
|
|
// constructed into the NRVO variable.
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2010-05-17 23:52:46 +08:00
|
|
|
// If there is an NRVO flag for this variable, set it to 1 into indicate
|
|
|
|
// that the cleanup code should not destroy the variable.
|
2011-02-08 16:22:06 +08:00
|
|
|
if (llvm::Value *NRVOFlag = NRVOFlags[S.getNRVOCandidate()])
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Builder.CreateFlagStore(Builder.getTrue(), NRVOFlag);
|
|
|
|
} else if (!ReturnValue.isValid() || (RV && RV->getType()->isVoidType())) {
|
2008-09-10 05:00:17 +08:00
|
|
|
// Make sure not to return anything, but evaluate the expression
|
|
|
|
// for side effects.
|
2021-06-23 05:44:29 +08:00
|
|
|
if (RV) {
|
2008-05-22 09:22:33 +08:00
|
|
|
EmitAnyExpr(RV);
|
2021-06-23 05:44:29 +08:00
|
|
|
if (auto *CE = dyn_cast<CallExpr>(RV))
|
|
|
|
makeTailCallIfSwiftAsync(CE, Builder, CurFnInfo);
|
|
|
|
}
|
2014-05-21 13:09:00 +08:00
|
|
|
} else if (!RV) {
|
2008-09-10 05:00:17 +08:00
|
|
|
// Do nothing (return value is left uninitialized)
|
2009-05-27 12:56:12 +08:00
|
|
|
} else if (FnRetTy->isReferenceType()) {
|
|
|
|
// If this function returns a reference, take the address of the expression
|
|
|
|
// rather than the value.
|
2013-06-13 07:38:09 +08:00
|
|
|
RValue Result = EmitReferenceBindingToExpr(RV);
|
2010-03-25 07:14:04 +08:00
|
|
|
Builder.CreateStore(Result.getScalarVal(), ReturnValue);
|
2007-06-02 11:19:07 +08:00
|
|
|
} else {
|
2013-03-08 05:37:08 +08:00
|
|
|
switch (getEvaluationKind(RV->getType())) {
|
|
|
|
case TEK_Scalar:
|
|
|
|
Builder.CreateStore(EmitScalarExpr(RV), ReturnValue);
|
|
|
|
break;
|
|
|
|
case TEK_Complex:
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
EmitComplexExprIntoLValue(RV, MakeAddrLValue(ReturnValue, RV->getType()),
|
2013-03-08 05:37:08 +08:00
|
|
|
/*isInit*/ true);
|
|
|
|
break;
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
case TEK_Aggregate:
|
2018-04-06 04:52:58 +08:00
|
|
|
EmitAggExpr(RV, AggValueSlot::forAddr(
|
|
|
|
ReturnValue, Qualifiers(),
|
|
|
|
AggValueSlot::IsDestructed,
|
|
|
|
AggValueSlot::DoesNotNeedGCBarriers,
|
|
|
|
AggValueSlot::IsNotAliased,
|
2019-06-21 04:56:20 +08:00
|
|
|
getOverlapForReturnValue()));
|
2013-03-08 05:37:08 +08:00
|
|
|
break;
|
|
|
|
}
|
2007-06-02 11:19:07 +08:00
|
|
|
}
|
2008-05-22 09:22:33 +08:00
|
|
|
|
2013-05-08 06:41:09 +08:00
|
|
|
++NumReturnExprs;
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!RV || RV->isEvaluatable(getContext()))
|
2013-05-08 06:41:09 +08:00
|
|
|
++NumSimpleReturnExprs;
|
2013-05-03 01:30:20 +08:00
|
|
|
|
2012-09-25 14:56:03 +08:00
|
|
|
cleanupScope.ForceCleanup();
|
2009-02-10 04:31:03 +08:00
|
|
|
EmitBranchThroughCleanup(ReturnBlock);
|
2007-06-02 11:19:07 +08:00
|
|
|
}
|
|
|
|
|
2007-06-09 09:20:56 +08:00
|
|
|
void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) {
|
2011-06-04 08:38:02 +08:00
|
|
|
// As long as debug info is modeled with instructions, we have to ensure we
|
|
|
|
// have a place to insert here and write the stop point here.
|
2012-04-10 13:04:07 +08:00
|
|
|
if (HaveInsertPoint())
|
2011-06-04 08:38:02 +08:00
|
|
|
EmitStopPoint(&S);
|
|
|
|
|
2014-03-15 01:01:24 +08:00
|
|
|
for (const auto *I : S.decls())
|
|
|
|
EmitDecl(*I);
|
2007-07-12 23:43:07 +08:00
|
|
|
}
|
2007-07-17 05:28:45 +08:00
|
|
|
|
2008-11-12 16:21:33 +08:00
|
|
|
void CodeGenFunction::EmitBreakStmt(const BreakStmt &S) {
|
2007-07-17 05:28:45 +08:00
|
|
|
assert(!BreakContinueStack.empty() && "break stmt not in a loop or switch!");
|
|
|
|
|
2008-11-12 16:21:33 +08:00
|
|
|
// If this code is reachable then emit a stop point (if generating
|
|
|
|
// debug info). We have to do this ourselves because we are on the
|
|
|
|
// "simple" statement path.
|
|
|
|
if (HaveInsertPoint())
|
|
|
|
EmitStopPoint(&S);
|
2009-02-08 17:22:19 +08:00
|
|
|
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
EmitBranchThroughCleanup(BreakContinueStack.back().BreakBlock);
|
2007-07-17 05:28:45 +08:00
|
|
|
}
|
|
|
|
|
2008-11-12 16:21:33 +08:00
|
|
|
void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) {
|
2007-07-17 05:28:45 +08:00
|
|
|
assert(!BreakContinueStack.empty() && "continue stmt not in a loop!");
|
|
|
|
|
2008-11-12 16:21:33 +08:00
|
|
|
// If this code is reachable then emit a stop point (if generating
|
|
|
|
// debug info). We have to do this ourselves because we are on the
|
|
|
|
// "simple" statement path.
|
|
|
|
if (HaveInsertPoint())
|
|
|
|
EmitStopPoint(&S);
|
2009-02-08 17:22:19 +08:00
|
|
|
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
EmitBranchThroughCleanup(BreakContinueStack.back().ContinueBlock);
|
2007-07-17 05:28:45 +08:00
|
|
|
}
|
2007-10-05 07:45:31 +08:00
|
|
|
|
2007-10-09 04:57:48 +08:00
|
|
|
/// EmitCaseStmtRange - If case statement range is not too big then
|
|
|
|
/// add multiple cases to switch instruction, one for each value within
|
|
|
|
/// the range. If range is too big then emit "if" condition check.
|
2020-10-18 19:34:41 +08:00
|
|
|
void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S,
|
|
|
|
ArrayRef<const Attr *> Attrs) {
|
2008-07-24 09:18:41 +08:00
|
|
|
assert(S.getRHS() && "Expected RHS value in CaseStmt");
|
2007-10-09 04:57:48 +08:00
|
|
|
|
2011-10-11 02:28:20 +08:00
|
|
|
llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
|
|
|
|
llvm::APSInt RHS = S.getRHS()->EvaluateKnownConstInt(getContext());
|
2008-07-24 09:18:41 +08:00
|
|
|
|
2008-07-25 09:11:38 +08:00
|
|
|
// Emit the code for this case. We do this first to make sure it is
|
|
|
|
// properly chained from our predecessor before generating the
|
|
|
|
// switch machinery to enter this block.
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
|
2015-04-24 07:06:47 +08:00
|
|
|
EmitBlockWithFallThrough(CaseDest, &S);
|
2008-07-25 09:11:38 +08:00
|
|
|
EmitStmt(S.getSubStmt());
|
|
|
|
|
2008-07-24 09:18:41 +08:00
|
|
|
// If range is empty, do nothing.
|
|
|
|
if (LHS.isSigned() ? RHS.slt(LHS) : RHS.ult(LHS))
|
|
|
|
return;
|
2007-10-09 04:57:48 +08:00
|
|
|
|
2020-10-18 19:34:41 +08:00
|
|
|
Stmt::Likelihood LH = Stmt::getLikelihood(Attrs);
|
2007-10-09 04:57:48 +08:00
|
|
|
llvm::APInt Range = RHS - LHS;
|
2008-07-25 09:11:38 +08:00
|
|
|
// FIXME: parameters such as this should not be hardcoded.
|
2007-10-09 04:57:48 +08:00
|
|
|
if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
|
|
|
|
// Range is small enough to add multiple switch instruction cases.
|
2015-04-24 07:06:47 +08:00
|
|
|
uint64_t Total = getProfileCount(&S);
|
2014-01-07 06:27:43 +08:00
|
|
|
unsigned NCases = Range.getZExtValue() + 1;
|
2014-01-14 05:24:25 +08:00
|
|
|
// We only have one region counter for the entire set of cases here, so we
|
|
|
|
// need to divide the weights evenly between the generated cases, ensuring
|
2014-02-18 03:21:03 +08:00
|
|
|
// that the total weight is preserved. E.g., a weight of 5 over three cases
|
2014-01-14 05:24:25 +08:00
|
|
|
// will be distributed as weights of 2, 2, and 1.
|
2014-01-07 06:27:43 +08:00
|
|
|
uint64_t Weight = Total / NCases, Rem = Total % NCases;
|
|
|
|
for (unsigned I = 0; I != NCases; ++I) {
|
|
|
|
if (SwitchWeights)
|
|
|
|
SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
|
2020-10-18 19:34:41 +08:00
|
|
|
else if (SwitchLikelihood)
|
|
|
|
SwitchLikelihood->push_back(LH);
|
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
if (Rem)
|
|
|
|
Rem--;
|
2011-04-20 04:53:45 +08:00
|
|
|
SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
|
2017-04-20 05:02:45 +08:00
|
|
|
++LHS;
|
2007-10-06 04:54:07 +08:00
|
|
|
}
|
2007-10-09 04:57:48 +08:00
|
|
|
return;
|
2009-09-09 23:08:12 +08:00
|
|
|
}
|
|
|
|
|
2008-07-25 09:11:38 +08:00
|
|
|
// The range is too big. Emit "if" condition into a new block,
|
|
|
|
// making sure to save and restore the current insertion point.
|
|
|
|
llvm::BasicBlock *RestoreBB = Builder.GetInsertBlock();
|
|
|
|
|
|
|
|
// Push this test onto the chain of range checks (which terminates
|
|
|
|
// in the default basic block). The switch's default will be changed
|
|
|
|
// to the top of this chain after switch emission is complete.
|
|
|
|
llvm::BasicBlock *FalseDest = CaseRangeBlock;
|
2008-11-11 10:29:29 +08:00
|
|
|
CaseRangeBlock = createBasicBlock("sw.caserange");
|
2008-07-25 09:11:38 +08:00
|
|
|
|
|
|
|
CurFn->getBasicBlockList().push_back(CaseRangeBlock);
|
|
|
|
Builder.SetInsertPoint(CaseRangeBlock);
|
2007-10-09 04:57:48 +08:00
|
|
|
|
|
|
|
// Emit range check.
|
2009-09-09 23:08:12 +08:00
|
|
|
llvm::Value *Diff =
|
2011-09-28 05:06:10 +08:00
|
|
|
Builder.CreateSub(SwitchInsn->getCondition(), Builder.getInt(LHS));
|
2009-09-09 23:08:12 +08:00
|
|
|
llvm::Value *Cond =
|
2011-04-20 04:53:45 +08:00
|
|
|
Builder.CreateICmpULE(Diff, Builder.getInt(Range), "inbounds");
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
llvm::MDNode *Weights = nullptr;
|
2014-01-07 06:27:43 +08:00
|
|
|
if (SwitchWeights) {
|
2015-04-24 07:06:47 +08:00
|
|
|
uint64_t ThisCount = getProfileCount(&S);
|
2014-01-07 06:27:43 +08:00
|
|
|
uint64_t DefaultCount = (*SwitchWeights)[0];
|
2015-05-02 13:00:55 +08:00
|
|
|
Weights = createProfileWeights(ThisCount, DefaultCount);
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
// Since we're chaining the switch default through each large case range, we
|
|
|
|
// need to update the weight for the default, ie, the first case, to include
|
|
|
|
// this case.
|
|
|
|
(*SwitchWeights)[0] += ThisCount;
|
2020-10-18 19:34:41 +08:00
|
|
|
} else if (SwitchLikelihood)
|
2021-03-22 03:13:47 +08:00
|
|
|
Cond = emitCondLikelihoodViaExpectIntrinsic(Cond, LH);
|
2020-10-18 19:34:41 +08:00
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);
|
2007-10-09 04:57:48 +08:00
|
|
|
|
2008-07-25 09:11:38 +08:00
|
|
|
// Restore the appropriate insertion point.
|
2008-11-12 07:11:34 +08:00
|
|
|
if (RestoreBB)
|
|
|
|
Builder.SetInsertPoint(RestoreBB);
|
|
|
|
else
|
|
|
|
Builder.ClearInsertionPoint();
|
2007-10-09 04:57:48 +08:00
|
|
|
}
|
2007-10-06 04:54:07 +08:00
|
|
|
|
2020-10-18 19:34:41 +08:00
|
|
|
void CodeGenFunction::EmitCaseStmt(const CaseStmt &S,
|
|
|
|
ArrayRef<const Attr *> Attrs) {
|
2012-01-17 01:35:57 +08:00
|
|
|
// If there is no enclosing switch instance that we're aware of, then this
|
|
|
|
// case statement and its block can be elided. This situation only happens
|
|
|
|
// when we've constant-folded the switch, are emitting the constant case,
|
2013-11-22 18:20:40 +08:00
|
|
|
// and part of the constant case includes another case statement. For
|
2012-01-17 01:35:57 +08:00
|
|
|
// instance: switch (4) { case 4: do { case 5: } while (1); }
|
2012-01-18 07:55:19 +08:00
|
|
|
if (!SwitchInsn) {
|
|
|
|
EmitStmt(S.getSubStmt());
|
2012-01-17 01:35:57 +08:00
|
|
|
return;
|
2012-01-18 07:55:19 +08:00
|
|
|
}
|
2012-01-17 01:35:57 +08:00
|
|
|
|
2011-04-17 08:54:30 +08:00
|
|
|
// Handle case ranges.
|
2007-10-09 04:57:48 +08:00
|
|
|
if (S.getRHS()) {
|
2020-10-18 19:34:41 +08:00
|
|
|
EmitCaseStmtRange(S, Attrs);
|
2007-10-09 04:57:48 +08:00
|
|
|
return;
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-04-20 04:53:45 +08:00
|
|
|
llvm::ConstantInt *CaseVal =
|
2011-10-11 02:28:20 +08:00
|
|
|
Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
|
2020-10-18 19:34:41 +08:00
|
|
|
if (SwitchLikelihood)
|
|
|
|
SwitchLikelihood->push_back(Stmt::getLikelihood(Attrs));
|
2011-04-20 04:53:45 +08:00
|
|
|
|
2014-01-21 08:35:11 +08:00
|
|
|
// If the body of the case is just a 'break', try to not emit an empty block.
|
|
|
|
// If we're profiling or we're not optimizing, leave the block in for better
|
|
|
|
// debug and coverage analysis.
|
2016-02-05 02:39:09 +08:00
|
|
|
if (!CGM.getCodeGenOpts().hasProfileClangInstr() &&
|
2014-01-21 08:35:11 +08:00
|
|
|
CGM.getCodeGenOpts().OptimizationLevel > 0 &&
|
|
|
|
isa<BreakStmt>(S.getSubStmt())) {
|
|
|
|
JumpDest Block = BreakContinueStack.back().BreakBlock;
|
|
|
|
|
|
|
|
// Only do this optimization if there are no cleanups that need emitting.
|
|
|
|
if (isObviouslyBranchWithoutCleanups(Block)) {
|
|
|
|
if (SwitchWeights)
|
2015-04-24 07:06:47 +08:00
|
|
|
SwitchWeights->push_back(getProfileCount(&S));
|
2014-01-21 08:35:11 +08:00
|
|
|
SwitchInsn->addCase(CaseVal, Block.getBlock());
|
|
|
|
|
|
|
|
// If there was a fallthrough into this case, make sure to redirect it to
|
|
|
|
// the end of the switch as well.
|
|
|
|
if (Builder.GetInsertBlock()) {
|
|
|
|
Builder.CreateBr(Block.getBlock());
|
|
|
|
Builder.ClearInsertionPoint();
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
llvm::BasicBlock *CaseDest = createBasicBlock("sw.bb");
|
2015-04-24 07:06:47 +08:00
|
|
|
EmitBlockWithFallThrough(CaseDest, &S);
|
2014-01-07 06:27:43 +08:00
|
|
|
if (SwitchWeights)
|
2015-04-24 07:06:47 +08:00
|
|
|
SwitchWeights->push_back(getProfileCount(&S));
|
2011-04-20 04:53:45 +08:00
|
|
|
SwitchInsn->addCase(CaseVal, CaseDest);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
add a special case for codegen that improves the case where we have
multiple sequential cases to a) not create tons of fall-through basic blocks
and b) not recurse deeply. This fixes codegen on 100K deep cases, and improves
codegen on moderate cases from this:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb1
i32 1002, label %sw.bb2
i32 1003, label %sw.bb3
i32 1004, label %sw.bb4
...
sw.bb: ; preds = %entry
br label %sw.bb1
sw.bb1: ; preds = %entry, %sw.bb
br label %sw.bb2
sw.bb2: ; preds = %entry, %sw.bb1
br label %sw.bb3
sw.bb3: ; preds = %entry, %sw.bb2
br label %sw.bb4
to:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb
i32 1002, label %sw.bb
i32 1003, label %sw.bb
i32 1004, label %sw.bb
sw.bb: ;; many preds
llvm-svn: 66015
2009-03-04 12:46:18 +08:00
|
|
|
// Recursively emitting the statement is acceptable, but is not wonderful for
|
|
|
|
// code where we have many case statements nested together, i.e.:
|
|
|
|
// case 1:
|
|
|
|
// case 2:
|
|
|
|
// case 3: etc.
|
|
|
|
// Handling this recursively will create a new block for each case statement
|
|
|
|
// that falls through to the next case which is IR intensive. It also causes
|
|
|
|
// deep recursion which can run into stack depth limitations. Handle
|
|
|
|
// sequential non-range case statements specially.
|
2020-10-18 19:34:41 +08:00
|
|
|
//
|
|
|
|
// TODO When the next case has a likelihood attribute the code returns to the
|
|
|
|
// recursive algorithm. Maybe improve this case if it becomes common practice
|
|
|
|
// to use a lot of attributes.
|
add a special case for codegen that improves the case where we have
multiple sequential cases to a) not create tons of fall-through basic blocks
and b) not recurse deeply. This fixes codegen on 100K deep cases, and improves
codegen on moderate cases from this:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb1
i32 1002, label %sw.bb2
i32 1003, label %sw.bb3
i32 1004, label %sw.bb4
...
sw.bb: ; preds = %entry
br label %sw.bb1
sw.bb1: ; preds = %entry, %sw.bb
br label %sw.bb2
sw.bb2: ; preds = %entry, %sw.bb1
br label %sw.bb3
sw.bb3: ; preds = %entry, %sw.bb2
br label %sw.bb4
to:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb
i32 1002, label %sw.bb
i32 1003, label %sw.bb
i32 1004, label %sw.bb
sw.bb: ;; many preds
llvm-svn: 66015
2009-03-04 12:46:18 +08:00
|
|
|
const CaseStmt *CurCase = &S;
|
|
|
|
const CaseStmt *NextCase = dyn_cast<CaseStmt>(S.getSubStmt());
|
|
|
|
|
2011-04-20 04:53:45 +08:00
|
|
|
// Otherwise, iteratively add consecutive cases to this switch stmt.
|
2014-05-21 13:09:00 +08:00
|
|
|
while (NextCase && NextCase->getRHS() == nullptr) {
|
add a special case for codegen that improves the case where we have
multiple sequential cases to a) not create tons of fall-through basic blocks
and b) not recurse deeply. This fixes codegen on 100K deep cases, and improves
codegen on moderate cases from this:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb1
i32 1002, label %sw.bb2
i32 1003, label %sw.bb3
i32 1004, label %sw.bb4
...
sw.bb: ; preds = %entry
br label %sw.bb1
sw.bb1: ; preds = %entry, %sw.bb
br label %sw.bb2
sw.bb2: ; preds = %entry, %sw.bb1
br label %sw.bb3
sw.bb3: ; preds = %entry, %sw.bb2
br label %sw.bb4
to:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb
i32 1002, label %sw.bb
i32 1003, label %sw.bb
i32 1004, label %sw.bb
sw.bb: ;; many preds
llvm-svn: 66015
2009-03-04 12:46:18 +08:00
|
|
|
CurCase = NextCase;
|
2014-01-07 06:27:43 +08:00
|
|
|
llvm::ConstantInt *CaseVal =
|
2011-10-11 02:28:20 +08:00
|
|
|
Builder.getInt(CurCase->getLHS()->EvaluateKnownConstInt(getContext()));
|
2014-01-07 06:27:43 +08:00
|
|
|
|
|
|
|
if (SwitchWeights)
|
2015-04-24 07:06:47 +08:00
|
|
|
SwitchWeights->push_back(getProfileCount(NextCase));
|
2016-02-05 02:39:09 +08:00
|
|
|
if (CGM.getCodeGenOpts().hasProfileClangInstr()) {
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
CaseDest = createBasicBlock("sw.bb");
|
2020-12-29 01:20:48 +08:00
|
|
|
EmitBlockWithFallThrough(CaseDest, CurCase);
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
}
|
2020-10-18 19:34:41 +08:00
|
|
|
// Since this loop is only executed when the CaseStmt has no attributes
|
|
|
|
// use a hard-coded value.
|
|
|
|
if (SwitchLikelihood)
|
|
|
|
SwitchLikelihood->push_back(Stmt::LH_None);
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2011-04-20 04:53:45 +08:00
|
|
|
SwitchInsn->addCase(CaseVal, CaseDest);
|
add a special case for codegen that improves the case where we have
multiple sequential cases to a) not create tons of fall-through basic blocks
and b) not recurse deeply. This fixes codegen on 100K deep cases, and improves
codegen on moderate cases from this:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb1
i32 1002, label %sw.bb2
i32 1003, label %sw.bb3
i32 1004, label %sw.bb4
...
sw.bb: ; preds = %entry
br label %sw.bb1
sw.bb1: ; preds = %entry, %sw.bb
br label %sw.bb2
sw.bb2: ; preds = %entry, %sw.bb1
br label %sw.bb3
sw.bb3: ; preds = %entry, %sw.bb2
br label %sw.bb4
to:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb
i32 1002, label %sw.bb
i32 1003, label %sw.bb
i32 1004, label %sw.bb
sw.bb: ;; many preds
llvm-svn: 66015
2009-03-04 12:46:18 +08:00
|
|
|
NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2021-09-18 01:58:46 +08:00
|
|
|
// Generate a stop point for debug info if the case statement is
|
|
|
|
// followed by a default statement. A fallthrough case before a
|
|
|
|
// default case gets its own branch target.
|
|
|
|
if (CurCase->getSubStmt()->getStmtClass() == Stmt::DefaultStmtClass)
|
|
|
|
EmitStopPoint(CurCase);
|
|
|
|
|
add a special case for codegen that improves the case where we have
multiple sequential cases to a) not create tons of fall-through basic blocks
and b) not recurse deeply. This fixes codegen on 100K deep cases, and improves
codegen on moderate cases from this:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb1
i32 1002, label %sw.bb2
i32 1003, label %sw.bb3
i32 1004, label %sw.bb4
...
sw.bb: ; preds = %entry
br label %sw.bb1
sw.bb1: ; preds = %entry, %sw.bb
br label %sw.bb2
sw.bb2: ; preds = %entry, %sw.bb1
br label %sw.bb3
sw.bb3: ; preds = %entry, %sw.bb2
br label %sw.bb4
to:
switch i32 %tmp, label %sw.epilog [
i32 1000, label %sw.bb
i32 1001, label %sw.bb
i32 1002, label %sw.bb
i32 1003, label %sw.bb
i32 1004, label %sw.bb
sw.bb: ;; many preds
llvm-svn: 66015
2009-03-04 12:46:18 +08:00
|
|
|
// Normal default recursion for non-cases.
|
|
|
|
EmitStmt(CurCase->getSubStmt());
|
2007-10-05 07:45:31 +08:00
|
|
|
}
|
|
|
|
|
2020-10-18 19:34:41 +08:00
|
|
|
void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S,
|
|
|
|
ArrayRef<const Attr *> Attrs) {
|
2016-07-22 06:31:40 +08:00
|
|
|
// If there is no enclosing switch instance that we're aware of, then this
|
|
|
|
// default statement can be elided. This situation only happens when we've
|
|
|
|
// constant-folded the switch.
|
|
|
|
if (!SwitchInsn) {
|
|
|
|
EmitStmt(S.getSubStmt());
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-07-25 09:11:38 +08:00
|
|
|
llvm::BasicBlock *DefaultBlock = SwitchInsn->getDefaultDest();
|
2009-09-09 23:08:12 +08:00
|
|
|
assert(DefaultBlock->empty() &&
|
2008-11-11 10:29:29 +08:00
|
|
|
"EmitDefaultStmt: Default block already defined?");
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2020-10-18 19:34:41 +08:00
|
|
|
if (SwitchLikelihood)
|
|
|
|
SwitchLikelihood->front() = Stmt::getLikelihood(Attrs);
|
|
|
|
|
2015-04-24 07:06:47 +08:00
|
|
|
EmitBlockWithFallThrough(DefaultBlock, &S);
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2007-10-05 07:45:31 +08:00
|
|
|
EmitStmt(S.getSubStmt());
|
|
|
|
}
|
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
/// CollectStatementsForCase - Given the body of a 'switch' statement and a
|
|
|
|
/// constant value that is being switched on, see if we can dead code eliminate
|
|
|
|
/// the body of the switch to a simple series of statements to emit. Basically,
|
|
|
|
/// on a switch (5) we want to find these statements:
|
|
|
|
/// case 5:
|
|
|
|
/// printf(...); <--
|
|
|
|
/// ++i; <--
|
|
|
|
/// break;
|
|
|
|
///
|
|
|
|
/// and add them to the ResultStmts vector. If it is unsafe to do this
|
|
|
|
/// transformation (for example, one of the elided statements contains a label
|
|
|
|
/// that might be jumped to), return CSFC_Failure. If we handled it and 'S'
|
|
|
|
/// should include statements after it (e.g. the printf() line is a substmt of
|
|
|
|
/// the case) then return CSFC_FallThrough. If we handled it and found a break
|
|
|
|
/// statement, then return CSFC_Success.
|
|
|
|
///
|
|
|
|
/// If Case is non-null, then we are looking for the specified case, checking
|
|
|
|
/// that nothing we jump over contains labels. If Case is null, then we found
|
|
|
|
/// the case and are looking for the break.
|
|
|
|
///
|
|
|
|
/// If the recursive walk actually finds our Case, then we set FoundCase to
|
|
|
|
/// true.
|
|
|
|
///
|
|
|
|
enum CSFC_Result { CSFC_Failure, CSFC_FallThrough, CSFC_Success };
|
|
|
|
static CSFC_Result CollectStatementsForCase(const Stmt *S,
|
|
|
|
const SwitchCase *Case,
|
|
|
|
bool &FoundCase,
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVectorImpl<const Stmt*> &ResultStmts) {
|
2011-02-28 09:02:29 +08:00
|
|
|
// If this is a null statement, just succeed.
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!S)
|
2011-02-28 09:02:29 +08:00
|
|
|
return Case ? CSFC_Success : CSFC_FallThrough;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// If this is the switchcase (case 4: or default) that we're looking for, then
|
|
|
|
// we're in business. Just add the substatement.
|
|
|
|
if (const SwitchCase *SC = dyn_cast<SwitchCase>(S)) {
|
|
|
|
if (S == Case) {
|
|
|
|
FoundCase = true;
|
2014-05-21 13:09:00 +08:00
|
|
|
return CollectStatementsForCase(SC->getSubStmt(), nullptr, FoundCase,
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
ResultStmts);
|
|
|
|
}
|
2012-06-21 01:43:05 +08:00
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// Otherwise, this is some other case or default statement, just ignore it.
|
|
|
|
return CollectStatementsForCase(SC->getSubStmt(), Case, FoundCase,
|
|
|
|
ResultStmts);
|
|
|
|
}
|
2011-02-28 09:02:29 +08:00
|
|
|
|
|
|
|
// If we are in the live part of the code and we found our break statement,
|
|
|
|
// return a success!
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!Case && isa<BreakStmt>(S))
|
2011-02-28 09:02:29 +08:00
|
|
|
return CSFC_Success;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2011-02-28 09:02:29 +08:00
|
|
|
// If this is a switch statement, then it might contain the SwitchCase, the
|
|
|
|
// break, or neither.
|
|
|
|
if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(S)) {
|
|
|
|
// Handle this as two cases: we might be looking for the SwitchCase (if so
|
|
|
|
// the skipped statements must be skippable) or we might already have it.
|
|
|
|
CompoundStmt::const_body_iterator I = CS->body_begin(), E = CS->body_end();
|
2016-09-17 07:30:39 +08:00
|
|
|
bool StartedInLiveCode = FoundCase;
|
|
|
|
unsigned StartSize = ResultStmts.size();
|
|
|
|
|
|
|
|
// If we've not found the case yet, scan through looking for it.
|
2011-02-28 09:02:29 +08:00
|
|
|
if (Case) {
|
2011-02-28 15:22:44 +08:00
|
|
|
// Keep track of whether we see a skipped declaration. The code could be
|
|
|
|
// using the declaration even if it is skipped, so we can't optimize out
|
|
|
|
// the decl if the kept statements might refer to it.
|
|
|
|
bool HadSkippedDecl = false;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2011-02-28 09:02:29 +08:00
|
|
|
// If we're looking for the case, just see if we can skip each of the
|
|
|
|
// substatements.
|
|
|
|
for (; Case && I != E; ++I) {
|
2016-09-17 07:30:39 +08:00
|
|
|
HadSkippedDecl |= CodeGenFunction::mightAddDeclToScope(*I);
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2011-02-28 09:02:29 +08:00
|
|
|
switch (CollectStatementsForCase(*I, Case, FoundCase, ResultStmts)) {
|
|
|
|
case CSFC_Failure: return CSFC_Failure;
|
|
|
|
case CSFC_Success:
|
|
|
|
// A successful result means that either 1) that the statement doesn't
|
|
|
|
// have the case and is skippable, or 2) does contain the case value
|
2011-02-28 15:16:14 +08:00
|
|
|
// and also contains the break to exit the switch. In the later case,
|
|
|
|
// we just verify the rest of the statements are elidable.
|
|
|
|
if (FoundCase) {
|
2011-02-28 15:22:44 +08:00
|
|
|
// If we found the case and skipped declarations, we can't do the
|
|
|
|
// optimization.
|
|
|
|
if (HadSkippedDecl)
|
|
|
|
return CSFC_Failure;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2011-02-28 15:16:14 +08:00
|
|
|
for (++I; I != E; ++I)
|
|
|
|
if (CodeGenFunction::ContainsLabel(*I, true))
|
|
|
|
return CSFC_Failure;
|
|
|
|
return CSFC_Success;
|
|
|
|
}
|
2011-02-28 09:02:29 +08:00
|
|
|
break;
|
|
|
|
case CSFC_FallThrough:
|
|
|
|
// If we have a fallthrough condition, then we must have found the
|
|
|
|
// case started to include statements. Consider the rest of the
|
|
|
|
// statements in the compound statement as candidates for inclusion.
|
|
|
|
assert(FoundCase && "Didn't find case but returned fallthrough?");
|
|
|
|
// We recursively found Case, so we're not looking for it anymore.
|
2014-05-21 13:09:00 +08:00
|
|
|
Case = nullptr;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2011-02-28 15:22:44 +08:00
|
|
|
// If we found the case and skipped declarations, we can't do the
|
|
|
|
// optimization.
|
|
|
|
if (HadSkippedDecl)
|
|
|
|
return CSFC_Failure;
|
2011-02-28 09:02:29 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2016-09-17 07:30:39 +08:00
|
|
|
|
|
|
|
if (!FoundCase)
|
|
|
|
return CSFC_Success;
|
|
|
|
|
|
|
|
assert(!HadSkippedDecl && "fallthrough after skipping decl");
|
2011-02-28 09:02:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// If we have statements in our range, then we know that the statements are
|
|
|
|
// live and need to be added to the set of statements we're tracking.
|
2016-09-17 07:30:39 +08:00
|
|
|
bool AnyDecls = false;
|
2011-02-28 09:02:29 +08:00
|
|
|
for (; I != E; ++I) {
|
2016-09-17 07:30:39 +08:00
|
|
|
AnyDecls |= CodeGenFunction::mightAddDeclToScope(*I);
|
|
|
|
|
2014-05-21 13:09:00 +08:00
|
|
|
switch (CollectStatementsForCase(*I, nullptr, FoundCase, ResultStmts)) {
|
2011-02-28 09:02:29 +08:00
|
|
|
case CSFC_Failure: return CSFC_Failure;
|
|
|
|
case CSFC_FallThrough:
|
|
|
|
// A fallthrough result means that the statement was simple and just
|
|
|
|
// included in ResultStmt, keep adding them afterwards.
|
|
|
|
break;
|
|
|
|
case CSFC_Success:
|
|
|
|
// A successful result means that we found the break statement and
|
|
|
|
// stopped statement inclusion. We just ensure that any leftover stmts
|
|
|
|
// are skippable and return success ourselves.
|
|
|
|
for (++I; I != E; ++I)
|
|
|
|
if (CodeGenFunction::ContainsLabel(*I, true))
|
|
|
|
return CSFC_Failure;
|
|
|
|
return CSFC_Success;
|
2012-06-21 01:43:05 +08:00
|
|
|
}
|
2011-02-28 09:02:29 +08:00
|
|
|
}
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2016-09-17 07:30:39 +08:00
|
|
|
// If we're about to fall out of a scope without hitting a 'break;', we
|
|
|
|
// can't perform the optimization if there were any decls in that scope
|
|
|
|
// (we'd lose their end-of-lifetime).
|
|
|
|
if (AnyDecls) {
|
|
|
|
// If the entire compound statement was live, there's one more thing we
|
|
|
|
// can try before giving up: emit the whole thing as a single statement.
|
|
|
|
// We can do that unless the statement contains a 'break;'.
|
|
|
|
// FIXME: Such a break must be at the end of a construct within this one.
|
|
|
|
// We could emit this by just ignoring the BreakStmts entirely.
|
|
|
|
if (StartedInLiveCode && !CodeGenFunction::containsBreak(S)) {
|
|
|
|
ResultStmts.resize(StartSize);
|
|
|
|
ResultStmts.push_back(S);
|
|
|
|
} else {
|
|
|
|
return CSFC_Failure;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return CSFC_FallThrough;
|
2011-02-28 09:02:29 +08:00
|
|
|
}
|
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// Okay, this is some other statement that we don't handle explicitly, like a
|
|
|
|
// for statement or increment etc. If we are skipping over this statement,
|
|
|
|
// just verify it doesn't have labels, which would make it invalid to elide.
|
|
|
|
if (Case) {
|
2011-02-28 15:22:44 +08:00
|
|
|
if (CodeGenFunction::ContainsLabel(S, true))
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
return CSFC_Failure;
|
|
|
|
return CSFC_Success;
|
|
|
|
}
|
2012-06-21 01:43:05 +08:00
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// Otherwise, we want to include this statement. Everything is cool with that
|
|
|
|
// so long as it doesn't contain a break out of the switch we're in.
|
|
|
|
if (CodeGenFunction::containsBreak(S)) return CSFC_Failure;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// Otherwise, everything is great. Include the statement and tell the caller
|
|
|
|
// that we fall through and include the next statement as well.
|
|
|
|
ResultStmts.push_back(S);
|
|
|
|
return CSFC_FallThrough;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// FindCaseStatementsForValue - Find the case statement being jumped to and
|
|
|
|
/// then invoke CollectStatementsForCase to find the list of statements to emit
|
|
|
|
/// for a switch on constant. See the comment above CollectStatementsForCase
|
|
|
|
/// for more details.
|
|
|
|
static bool FindCaseStatementsForValue(const SwitchStmt &S,
|
2012-07-24 04:21:35 +08:00
|
|
|
const llvm::APSInt &ConstantCondValue,
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVectorImpl<const Stmt*> &ResultStmts,
|
2014-01-07 06:27:43 +08:00
|
|
|
ASTContext &C,
|
|
|
|
const SwitchCase *&ResultCase) {
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// First step, find the switch case that is being branched to. We can do this
|
|
|
|
// efficiently by scanning the SwitchCase list.
|
|
|
|
const SwitchCase *Case = S.getSwitchCaseList();
|
2014-05-21 13:09:00 +08:00
|
|
|
const DefaultStmt *DefaultCase = nullptr;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
for (; Case; Case = Case->getNextSwitchCase()) {
|
|
|
|
// It's either a default or case. Just remember the default statement in
|
|
|
|
// case we're not jumping to any numbered cases.
|
|
|
|
if (const DefaultStmt *DS = dyn_cast<DefaultStmt>(Case)) {
|
|
|
|
DefaultCase = DS;
|
|
|
|
continue;
|
|
|
|
}
|
2012-06-21 01:43:05 +08:00
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// Check to see if this case is the one we're looking for.
|
|
|
|
const CaseStmt *CS = cast<CaseStmt>(Case);
|
|
|
|
// Don't handle case ranges yet.
|
|
|
|
if (CS->getRHS()) return false;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// If we found our case, remember it as 'case'.
|
2011-10-11 02:28:20 +08:00
|
|
|
if (CS->getLHS()->EvaluateKnownConstInt(C) == ConstantCondValue)
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
break;
|
|
|
|
}
|
2012-06-21 01:43:05 +08:00
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// If we didn't find a matching case, we use a default if it exists, or we
|
|
|
|
// elide the whole switch body!
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!Case) {
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// It is safe to elide the body of the switch if it doesn't contain labels
|
|
|
|
// etc. If it is safe, return successfully with an empty ResultStmts list.
|
2014-05-21 13:09:00 +08:00
|
|
|
if (!DefaultCase)
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
return !CodeGenFunction::ContainsLabel(&S);
|
|
|
|
Case = DefaultCase;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Ok, we know which case is being jumped to, try to collect all the
|
|
|
|
// statements that follow it. This can fail for a variety of reasons. Also,
|
|
|
|
// check to see that the recursive walk actually found our case statement.
|
|
|
|
// Insane cases like this can fail to find it in the recursive walk since we
|
|
|
|
// don't handle every stmt kind:
|
|
|
|
// switch (4) {
|
|
|
|
// while (1) {
|
|
|
|
// case 4: ...
|
|
|
|
bool FoundCase = false;
|
2014-01-07 06:27:43 +08:00
|
|
|
ResultCase = Case;
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
return CollectStatementsForCase(S.getBody(), Case, FoundCase,
|
|
|
|
ResultStmts) != CSFC_Failure &&
|
|
|
|
FoundCase;
|
|
|
|
}
|
|
|
|
|
2020-10-18 19:34:41 +08:00
|
|
|
static Optional<SmallVector<uint64_t, 16>>
|
|
|
|
getLikelihoodWeights(ArrayRef<Stmt::Likelihood> Likelihoods) {
|
|
|
|
// Are there enough branches to weight them?
|
|
|
|
if (Likelihoods.size() <= 1)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
uint64_t NumUnlikely = 0;
|
|
|
|
uint64_t NumNone = 0;
|
|
|
|
uint64_t NumLikely = 0;
|
|
|
|
for (const auto LH : Likelihoods) {
|
|
|
|
switch (LH) {
|
|
|
|
case Stmt::LH_Unlikely:
|
|
|
|
++NumUnlikely;
|
|
|
|
break;
|
|
|
|
case Stmt::LH_None:
|
|
|
|
++NumNone;
|
|
|
|
break;
|
|
|
|
case Stmt::LH_Likely:
|
|
|
|
++NumLikely;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Is there a likelihood attribute used?
|
|
|
|
if (NumUnlikely == 0 && NumLikely == 0)
|
|
|
|
return None;
|
|
|
|
|
|
|
|
// When multiple cases share the same code they can be combined during
|
|
|
|
// optimization. In that case the weights of the branch will be the sum of
|
|
|
|
// the individual weights. Make sure the combined sum of all neutral cases
|
|
|
|
// doesn't exceed the value of a single likely attribute.
|
|
|
|
// The additions both avoid divisions by 0 and make sure the weights of None
|
|
|
|
// don't exceed the weight of Likely.
|
|
|
|
const uint64_t Likely = INT32_MAX / (NumLikely + 2);
|
|
|
|
const uint64_t None = Likely / (NumNone + 1);
|
|
|
|
const uint64_t Unlikely = 0;
|
|
|
|
|
|
|
|
SmallVector<uint64_t, 16> Result;
|
|
|
|
Result.reserve(Likelihoods.size());
|
|
|
|
for (const auto LH : Likelihoods) {
|
|
|
|
switch (LH) {
|
|
|
|
case Stmt::LH_Unlikely:
|
|
|
|
Result.push_back(Unlikely);
|
|
|
|
break;
|
|
|
|
case Stmt::LH_None:
|
|
|
|
Result.push_back(None);
|
|
|
|
break;
|
|
|
|
case Stmt::LH_Likely:
|
|
|
|
Result.push_back(Likely);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
2007-10-05 07:45:31 +08:00
|
|
|
void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
|
2012-01-18 07:39:50 +08:00
|
|
|
// Handle nested switch statements.
|
|
|
|
llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
|
2014-01-07 06:27:43 +08:00
|
|
|
SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
|
2020-10-18 19:34:41 +08:00
|
|
|
SmallVector<Stmt::Likelihood, 16> *SavedSwitchLikelihood = SwitchLikelihood;
|
2012-01-18 07:39:50 +08:00
|
|
|
llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;
|
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// See if we can constant fold the condition of the switch and therefore only
|
|
|
|
// emit the live case statement (if any) of the switch.
|
2012-07-24 04:21:35 +08:00
|
|
|
llvm::APSInt ConstantCondValue;
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
if (ConstantFoldsToSimpleInteger(S.getCond(), ConstantCondValue)) {
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<const Stmt*, 4> CaseStmts;
|
2014-05-21 13:09:00 +08:00
|
|
|
const SwitchCase *Case = nullptr;
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
if (FindCaseStatementsForValue(S, ConstantCondValue, CaseStmts,
|
2014-01-07 06:27:43 +08:00
|
|
|
getContext(), Case)) {
|
2015-04-24 07:06:47 +08:00
|
|
|
if (Case)
|
|
|
|
incrementProfileCounter(Case);
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
RunCleanupsScope ExecutedScope(*this);
|
|
|
|
|
2016-07-14 08:11:03 +08:00
|
|
|
if (S.getInit())
|
|
|
|
EmitStmt(S.getInit());
|
|
|
|
|
2014-05-03 08:14:49 +08:00
|
|
|
// Emit the condition variable if needed inside the entire cleanup scope
|
|
|
|
// used by this special case for constant folded switches.
|
|
|
|
if (S.getConditionVariable())
|
2018-03-18 05:01:27 +08:00
|
|
|
EmitDecl(*S.getConditionVariable());
|
2014-05-03 08:14:49 +08:00
|
|
|
|
2012-01-18 07:39:50 +08:00
|
|
|
// At this point, we are no longer "within" a switch instance, so
|
|
|
|
// we can temporarily enforce this to ensure that any embedded case
|
|
|
|
// statements are not emitted.
|
2014-05-21 13:09:00 +08:00
|
|
|
SwitchInsn = nullptr;
|
2012-01-18 07:39:50 +08:00
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
// Okay, we can dead code eliminate everything except this case. Emit the
|
|
|
|
// specified series of statements and we're good.
|
|
|
|
for (unsigned i = 0, e = CaseStmts.size(); i != e; ++i)
|
|
|
|
EmitStmt(CaseStmts[i]);
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2012-01-18 07:39:50 +08:00
|
|
|
|
2012-04-10 13:04:04 +08:00
|
|
|
// Now we want to restore the saved switch instance so that nested
|
|
|
|
// switches continue to function properly
|
2012-01-18 07:39:50 +08:00
|
|
|
SwitchInsn = SavedSwitchInsn;
|
|
|
|
|
First tiny step to implementing PR9322: build infrastructure for only emitting the
live case of a switch statement when switching on a constant. This is terribly
limited, but enough to handle the trivial example included. Before we would
emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
switch i32 1, label %sw.epilog [
i32 1, label %sw.bb
]
sw.bb: ; preds = %entry
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
br label %sw.epilog
sw.epilog: ; preds = %sw.bb, %entry
switch i32 0, label %sw.epilog3 [
i32 1, label %sw.bb1
]
sw.bb1: ; preds = %sw.epilog
%tmp2 = load i32* %i.addr, align 4
%add = add nsw i32 %tmp2, 2
store i32 %add, i32* %i.addr, align 4
br label %sw.epilog3
sw.epilog3: ; preds = %sw.bb1, %sw.epilog
ret void
}
now we emit:
define void @test1(i32 %i) nounwind {
entry:
%i.addr = alloca i32, align 4
store i32 %i, i32* %i.addr, align 4
%tmp = load i32* %i.addr, align 4
%inc = add nsw i32 %tmp, 1
store i32 %inc, i32* %i.addr, align 4
ret void
}
This improves -O0 compile time (less IR to generate and shove through the code
generator) and the clever linux kernel people found a way to fail to build if we
don't do this optimization. This step isn't enough to handle the kernel case
though.
llvm-svn: 126597
2011-02-28 08:22:07 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2014-05-03 08:14:49 +08:00
|
|
|
JumpDest SwitchExit = getJumpDestInCurrentScope("sw.epilog");
|
|
|
|
|
|
|
|
RunCleanupsScope ConditionScope(*this);
|
2016-07-14 08:11:03 +08:00
|
|
|
|
|
|
|
if (S.getInit())
|
|
|
|
EmitStmt(S.getInit());
|
|
|
|
|
2014-05-03 08:14:49 +08:00
|
|
|
if (S.getConditionVariable())
|
2018-03-18 05:01:27 +08:00
|
|
|
EmitDecl(*S.getConditionVariable());
|
2007-10-05 07:45:31 +08:00
|
|
|
llvm::Value *CondV = EmitScalarExpr(S.getCond());
|
|
|
|
|
2008-07-25 09:11:38 +08:00
|
|
|
// Create basic block to hold stuff that comes after switch
|
|
|
|
// statement. We also need to create a default block now so that
|
|
|
|
// explicit case ranges tests can have a place to jump to on
|
|
|
|
// failure.
|
2008-11-11 10:29:29 +08:00
|
|
|
llvm::BasicBlock *DefaultBlock = createBasicBlock("sw.default");
|
2008-07-25 09:11:38 +08:00
|
|
|
SwitchInsn = Builder.CreateSwitch(CondV, DefaultBlock);
|
2014-01-07 06:27:43 +08:00
|
|
|
if (PGO.haveRegionCounts()) {
|
|
|
|
// Walk the SwitchCase list to find how many there are.
|
|
|
|
uint64_t DefaultCount = 0;
|
|
|
|
unsigned NumCases = 0;
|
|
|
|
for (const SwitchCase *Case = S.getSwitchCaseList();
|
|
|
|
Case;
|
|
|
|
Case = Case->getNextSwitchCase()) {
|
|
|
|
if (isa<DefaultStmt>(Case))
|
2015-04-24 07:06:47 +08:00
|
|
|
DefaultCount = getProfileCount(Case);
|
2014-01-07 06:27:43 +08:00
|
|
|
NumCases += 1;
|
|
|
|
}
|
|
|
|
SwitchWeights = new SmallVector<uint64_t, 16>();
|
|
|
|
SwitchWeights->reserve(NumCases);
|
|
|
|
// The default needs to be first. We store the edge count, so we already
|
|
|
|
// know the right weight.
|
|
|
|
SwitchWeights->push_back(DefaultCount);
|
2020-10-18 19:34:41 +08:00
|
|
|
} else if (CGM.getCodeGenOpts().OptimizationLevel) {
|
|
|
|
SwitchLikelihood = new SmallVector<Stmt::Likelihood, 16>();
|
|
|
|
// Initialize the default case.
|
|
|
|
SwitchLikelihood->push_back(Stmt::LH_None);
|
2014-01-07 06:27:43 +08:00
|
|
|
}
|
2020-10-18 19:34:41 +08:00
|
|
|
|
2008-07-25 09:11:38 +08:00
|
|
|
CaseRangeBlock = DefaultBlock;
|
2007-10-05 07:45:31 +08:00
|
|
|
|
2008-11-12 16:21:33 +08:00
|
|
|
// Clear the insertion point to indicate we are in unreachable code.
|
|
|
|
Builder.ClearInsertionPoint();
|
2008-05-13 00:08:04 +08:00
|
|
|
|
2013-12-05 12:47:09 +08:00
|
|
|
// All break statements jump to NextBlock. If BreakContinueStack is non-empty
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
// then reuse last ContinueBlock.
|
2010-07-06 09:34:17 +08:00
|
|
|
JumpDest OuterContinue;
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
if (!BreakContinueStack.empty())
|
|
|
|
OuterContinue = BreakContinueStack.back().ContinueBlock;
|
2009-02-10 13:52:02 +08:00
|
|
|
|
Change PGO instrumentation to compute counts in a separate AST traversal.
Previously, we made one traversal of the AST prior to codegen to assign
counters to the ASTs and then propagated the count values during codegen. This
patch now adds a separate AST traversal prior to codegen for the
-fprofile-instr-use option to propagate the count values. The counts are then
saved in a map from which they can be retrieved during codegen.
This new approach has several advantages:
1. It gets rid of a lot of extra PGO-related code that had previously been
added to codegen.
2. It fixes a serious bug. My original implementation (which was mailed to the
list but never committed) used 3 counters for every loop. Justin improved it to
move 2 of those counters into the less-frequently executed breaks and continues,
but that turned out to produce wrong count values in some cases. The solution
requires visiting a loop body before the condition so that the count for the
condition properly includes the break and continue counts. Changing codegen to
visit a loop body first would be a fairly invasive change, but with a separate
AST traversal, it is easy to control the order of traversal. I've added a
testcase (provided by Justin) to make sure this works correctly.
3. It improves the instrumentation overhead, reducing the number of counters for
a loop from 3 to 1. We no longer need dedicated counters for breaks and
continues, since we can just use the propagated count values when visiting
breaks and continues.
To make this work, I needed to make a change to the way we count case
statements, going back to my original approach of not including the fall-through
in the counter values. This was necessary because there isn't always an AST node
that can be used to record the fall-through count. Now case statements are
handled the same as default statements, with the fall-through paths branching
over the counter increments. While I was at it, I also went back to using this
approach for do-loops -- omitting the fall-through count into the loop body
simplifies some of the calculations and make them behave the same as other
loops. Whenever we start using this instrumentation for coverage, we'll need
to add the fall-through counts into the counter values.
llvm-svn: 201528
2014-02-18 03:21:09 +08:00
|
|
|
BreakContinueStack.push_back(BreakContinue(SwitchExit, OuterContinue));
|
2007-10-05 07:45:31 +08:00
|
|
|
|
|
|
|
// Emit switch body.
|
|
|
|
EmitStmt(S.getBody());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-02-10 13:52:02 +08:00
|
|
|
BreakContinueStack.pop_back();
|
2007-10-05 07:45:31 +08:00
|
|
|
|
2008-07-25 09:11:38 +08:00
|
|
|
// Update the default block in case explicit case range tests have
|
|
|
|
// been chained on top.
|
Compatability fix for SwitchInst refactoring.
The purpose of refactoring is to hide operand roles from SwitchInst user (programmer). If you want to play with operands directly, probably you will need lower level methods than SwitchInst ones (TerminatorInst or may be User). After this patch we can reorganize SwitchInst operands and successors as we want.
What was done:
1. Changed semantics of index inside the getCaseValue method:
getCaseValue(0) means "get first case", not a condition. Use getCondition() if you want to resolve the condition. I propose don't mix SwitchInst case indexing with low level indexing (TI successors indexing, User's operands indexing), since it may be dangerous.
2. By the same reason findCaseValue(ConstantInt*) returns actual number of case value. 0 means first case, not default. If there is no case with given value, ErrorIndex will returned.
3. Added getCaseSuccessor method. I propose to avoid usage of TerminatorInst::getSuccessor if you want to resolve case successor BB. Use getCaseSuccessor instead, since internal SwitchInst organization of operands/successors is hidden and may be changed in any moment.
4. Added resolveSuccessorIndex and resolveCaseIndex. The main purpose of these methods is to see how case successors are really mapped in TerminatorInst.
4.1 "resolveSuccessorIndex" was created if you need to level down from SwitchInst to TerminatorInst. It returns TerminatorInst's successor index for given case successor.
4.2 "resolveCaseIndex" converts low level successors index to case index that curresponds to the given successor.
Note: There are also related compatability fix patches for dragonegg, klee, llvm-gcc-4.0, llvm-gcc-4.2, safecode, clang.
llvm-svn: 149482
2012-02-01 15:50:21 +08:00
|
|
|
SwitchInsn->setDefaultDest(CaseRangeBlock);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-07-06 09:34:17 +08:00
|
|
|
// If a default was never emitted:
|
2008-07-25 09:11:38 +08:00
|
|
|
if (!DefaultBlock->getParent()) {
|
2010-07-06 09:34:17 +08:00
|
|
|
// If we have cleanups, emit the default block so that there's a
|
|
|
|
// place to jump through the cleanups from.
|
|
|
|
if (ConditionScope.requiresCleanups()) {
|
|
|
|
EmitBlock(DefaultBlock);
|
|
|
|
|
|
|
|
// Otherwise, just forward the default block to the switch end.
|
|
|
|
} else {
|
2010-07-24 05:56:41 +08:00
|
|
|
DefaultBlock->replaceAllUsesWith(SwitchExit.getBlock());
|
2010-07-06 09:34:17 +08:00
|
|
|
delete DefaultBlock;
|
|
|
|
}
|
2008-07-25 09:11:38 +08:00
|
|
|
}
|
|
|
|
|
2010-07-24 05:56:41 +08:00
|
|
|
ConditionScope.ForceCleanup();
|
|
|
|
|
2008-07-25 09:11:38 +08:00
|
|
|
// Emit continuation.
|
2010-07-24 05:56:41 +08:00
|
|
|
EmitBlock(SwitchExit.getBlock(), true);
|
2015-04-24 07:06:47 +08:00
|
|
|
incrementProfileCounter(&S);
|
2014-01-07 06:27:43 +08:00
|
|
|
|
2015-09-10 06:39:06 +08:00
|
|
|
// If the switch has a condition wrapped by __builtin_unpredictable,
|
|
|
|
// create metadata that specifies that the switch is unpredictable.
|
|
|
|
// Don't bother if not optimizing because that metadata would not be used.
|
2016-04-20 01:13:14 +08:00
|
|
|
auto *Call = dyn_cast<CallExpr>(S.getCond());
|
|
|
|
if (Call && CGM.getCodeGenOpts().OptimizationLevel != 0) {
|
|
|
|
auto *FD = dyn_cast_or_null<FunctionDecl>(Call->getCalleeDecl());
|
|
|
|
if (FD && FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
|
|
|
|
llvm::MDBuilder MDHelper(getLLVMContext());
|
|
|
|
SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
|
|
|
|
MDHelper.createUnpredictable());
|
2015-09-10 06:39:06 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-01-07 06:27:43 +08:00
|
|
|
if (SwitchWeights) {
|
|
|
|
assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
|
|
|
|
"switch weights do not match switch cases");
|
|
|
|
// If there's only one jump destination there's no sense weighting it.
|
|
|
|
if (SwitchWeights->size() > 1)
|
|
|
|
SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
|
2015-05-02 13:00:55 +08:00
|
|
|
createProfileWeights(*SwitchWeights));
|
2014-01-07 06:27:43 +08:00
|
|
|
delete SwitchWeights;
|
2020-10-18 19:34:41 +08:00
|
|
|
} else if (SwitchLikelihood) {
|
|
|
|
assert(SwitchLikelihood->size() == 1 + SwitchInsn->getNumCases() &&
|
|
|
|
"switch likelihoods do not match switch cases");
|
|
|
|
Optional<SmallVector<uint64_t, 16>> LHW =
|
|
|
|
getLikelihoodWeights(*SwitchLikelihood);
|
|
|
|
if (LHW) {
|
|
|
|
llvm::MDBuilder MDHelper(CGM.getLLVMContext());
|
|
|
|
SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
|
|
|
|
createProfileWeights(*LHW));
|
|
|
|
}
|
|
|
|
delete SwitchLikelihood;
|
2014-01-07 06:27:43 +08:00
|
|
|
}
|
2007-10-05 07:45:31 +08:00
|
|
|
SwitchInsn = SavedSwitchInsn;
|
2014-01-07 06:27:43 +08:00
|
|
|
SwitchWeights = SavedSwitchWeights;
|
2020-10-18 19:34:41 +08:00
|
|
|
SwitchLikelihood = SavedSwitchLikelihood;
|
2007-10-09 04:57:48 +08:00
|
|
|
CaseRangeBlock = SavedCRBlock;
|
2007-10-05 07:45:31 +08:00
|
|
|
}
|
2008-02-06 00:35:33 +08:00
|
|
|
|
2009-04-27 01:57:12 +08:00
|
|
|
static std::string
|
2009-11-13 13:51:54 +08:00
|
|
|
SimplifyConstraint(const char *Constraint, const TargetInfo &Target,
|
2014-05-21 13:09:00 +08:00
|
|
|
SmallVectorImpl<TargetInfo::ConstraintInfo> *OutCons=nullptr) {
|
2008-02-06 00:35:33 +08:00
|
|
|
std::string Result;
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
while (*Constraint) {
|
|
|
|
switch (*Constraint) {
|
|
|
|
default:
|
2011-06-08 07:45:05 +08:00
|
|
|
Result += Target.convertConstraint(Constraint);
|
2008-02-06 00:35:33 +08:00
|
|
|
break;
|
|
|
|
// Ignore these
|
|
|
|
case '*':
|
|
|
|
case '?':
|
|
|
|
case '!':
|
2010-08-11 03:20:14 +08:00
|
|
|
case '=': // Will see this and the following in mult-alt constraints.
|
|
|
|
case '+':
|
|
|
|
break;
|
2012-10-29 20:20:54 +08:00
|
|
|
case '#': // Ignore the rest of the constraint alternative.
|
|
|
|
while (Constraint[1] && Constraint[1] != ',')
|
2013-07-11 04:14:36 +08:00
|
|
|
Constraint++;
|
2012-10-29 20:20:54 +08:00
|
|
|
break;
|
2015-01-11 17:09:01 +08:00
|
|
|
case '&':
|
2015-01-11 17:13:56 +08:00
|
|
|
case '%':
|
|
|
|
Result += *Constraint;
|
|
|
|
while (Constraint[1] && Constraint[1] == *Constraint)
|
2015-01-11 17:09:01 +08:00
|
|
|
Constraint++;
|
|
|
|
break;
|
2010-09-18 09:15:13 +08:00
|
|
|
case ',':
|
|
|
|
Result += "|";
|
2008-02-06 00:35:33 +08:00
|
|
|
break;
|
|
|
|
case 'g':
|
|
|
|
Result += "imr";
|
|
|
|
break;
|
2009-01-18 10:06:20 +08:00
|
|
|
case '[': {
|
2009-04-27 01:57:12 +08:00
|
|
|
assert(OutCons &&
|
2009-01-18 10:06:20 +08:00
|
|
|
"Must pass output names to constraints with a symbolic name");
|
|
|
|
unsigned Index;
|
2015-10-21 10:34:10 +08:00
|
|
|
bool result = Target.resolveSymbolicName(Constraint, *OutCons, Index);
|
2011-01-06 02:41:53 +08:00
|
|
|
assert(result && "Could not resolve symbolic name"); (void)result;
|
2009-01-18 10:06:20 +08:00
|
|
|
Result += llvm::utostr(Index);
|
|
|
|
break;
|
|
|
|
}
|
2008-02-06 00:35:33 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
Constraint++;
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
2011-01-02 05:12:33 +08:00
|
|
|
/// AddVariableConstraints - Look at AsmExpr and if it is a variable declared
|
|
|
|
/// as using a particular register add that as a constraint that will be used
|
|
|
|
/// in this asm stmt.
|
2010-12-31 06:59:32 +08:00
|
|
|
static std::string
|
2011-01-02 05:12:33 +08:00
|
|
|
AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr,
|
|
|
|
const TargetInfo &Target, CodeGenModule &CGM,
|
2020-10-14 14:48:29 +08:00
|
|
|
const AsmStmt &Stmt, const bool EarlyClobber,
|
|
|
|
std::string *GCCReg = nullptr) {
|
2010-12-31 06:59:32 +08:00
|
|
|
const DeclRefExpr *AsmDeclRef = dyn_cast<DeclRefExpr>(&AsmExpr);
|
|
|
|
if (!AsmDeclRef)
|
|
|
|
return Constraint;
|
|
|
|
const ValueDecl &Value = *AsmDeclRef->getDecl();
|
|
|
|
const VarDecl *Variable = dyn_cast<VarDecl>(&Value);
|
|
|
|
if (!Variable)
|
|
|
|
return Constraint;
|
2021-01-05 06:17:45 +08:00
|
|
|
if (Variable->getStorageClass() != SC_Register)
|
2012-03-16 07:12:51 +08:00
|
|
|
return Constraint;
|
2010-12-31 06:59:32 +08:00
|
|
|
AsmLabelAttr *Attr = Variable->getAttr<AsmLabelAttr>();
|
|
|
|
if (!Attr)
|
|
|
|
return Constraint;
|
2011-07-23 18:55:15 +08:00
|
|
|
StringRef Register = Attr->getLabel();
|
2011-01-02 05:47:03 +08:00
|
|
|
assert(Target.isValidGCCRegisterName(Register));
|
2011-06-17 09:53:34 +08:00
|
|
|
// We're using validateOutputConstraint here because we only care if
|
|
|
|
// this is a register constraint.
|
|
|
|
TargetInfo::ConstraintInfo Info(Constraint, "");
|
|
|
|
if (Target.validateOutputConstraint(Info) &&
|
|
|
|
!Info.allowsRegister()) {
|
2010-12-31 06:59:32 +08:00
|
|
|
CGM.ErrorUnsupported(&Stmt, "__asm__");
|
|
|
|
return Constraint;
|
|
|
|
}
|
2011-06-21 08:07:10 +08:00
|
|
|
// Canonicalize the register here before returning it.
|
|
|
|
Register = Target.getNormalizedGCCRegisterName(Register);
|
2020-10-14 14:48:29 +08:00
|
|
|
if (GCCReg != nullptr)
|
|
|
|
*GCCReg = Register.str();
|
2015-02-04 22:25:47 +08:00
|
|
|
return (EarlyClobber ? "&{" : "{") + Register.str() + "}";
|
2010-12-31 06:59:32 +08:00
|
|
|
}
|
|
|
|
|
2022-01-05 22:16:24 +08:00
|
|
|
std::pair<llvm::Value*, llvm::Type *> CodeGenFunction::EmitAsmInputLValue(
|
|
|
|
const TargetInfo::ConstraintInfo &Info, LValue InputValue,
|
|
|
|
QualType InputType, std::string &ConstraintStr, SourceLocation Loc) {
|
2009-09-09 23:08:12 +08:00
|
|
|
if (Info.allowsRegister() || !Info.allowsMemory()) {
|
2022-01-05 22:16:24 +08:00
|
|
|
if (CodeGenFunction::hasScalarEvaluationKind(InputType))
|
|
|
|
return {EmitLoadOfLValue(InputValue, Loc).getScalarVal(), nullptr};
|
|
|
|
|
|
|
|
llvm::Type *Ty = ConvertType(InputType);
|
|
|
|
uint64_t Size = CGM.getDataLayout().getTypeSizeInBits(Ty);
|
|
|
|
if ((Size <= 64 && llvm::isPowerOf2_64(Size)) ||
|
|
|
|
getTargetHooks().isScalarizableAsmOperand(*this, Ty)) {
|
|
|
|
Ty = llvm::IntegerType::get(getLLVMContext(), Size);
|
|
|
|
|
2022-02-14 21:18:04 +08:00
|
|
|
return {Builder.CreateLoad(Builder.CreateElementBitCast(
|
|
|
|
InputValue.getAddress(*this), Ty)),
|
2022-01-05 22:16:24 +08:00
|
|
|
nullptr};
|
2009-01-12 03:32:54 +08:00
|
|
|
}
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2022-01-05 22:16:24 +08:00
|
|
|
Address Addr = InputValue.getAddress(*this);
|
|
|
|
ConstraintStr += '*';
|
|
|
|
return {Addr.getPointer(), Addr.getElementType()};
|
2009-01-12 03:32:54 +08:00
|
|
|
}
|
|
|
|
|
2022-01-05 22:16:24 +08:00
|
|
|
std::pair<llvm::Value *, llvm::Type *>
|
|
|
|
CodeGenFunction::EmitAsmInput(const TargetInfo::ConstraintInfo &Info,
|
|
|
|
const Expr *InputExpr,
|
|
|
|
std::string &ConstraintStr) {
|
2015-06-12 02:19:34 +08:00
|
|
|
// If this can't be a register or memory, i.e., has to be a constant
|
|
|
|
// (immediate or symbolic), try to emit it as such.
|
|
|
|
if (!Info.allowsRegister() && !Info.allowsMemory()) {
|
2018-12-19 06:54:03 +08:00
|
|
|
if (Info.requiresImmediateConstant()) {
|
2019-03-06 18:26:19 +08:00
|
|
|
Expr::EvalResult EVResult;
|
|
|
|
InputExpr->EvaluateAsRValue(EVResult, getContext(), true);
|
|
|
|
|
|
|
|
llvm::APSInt IntResult;
|
Delay diagnosing asm constraints that require immediates until after inlining
Summary:
An inline asm call may result in an immediate input value after inlining.
Therefore, don't emit a diagnostic here if the input isn't an immediate.
Reviewers: joerg, eli.friedman, rsmith
Subscribers: asb, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, jrtc27, zzheng, edward-jones, rogfer01, MartinMosbeck, brucehoult, the_o, PkmX, jocewei, s.egerton, krytarowski, mgorny, riccibruno, eraman, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D60943
llvm-svn: 368104
2019-08-07 06:41:22 +08:00
|
|
|
if (EVResult.Val.toIntegralConstant(IntResult, InputExpr->getType(),
|
|
|
|
getContext()))
|
2022-01-05 22:16:24 +08:00
|
|
|
return {llvm::ConstantInt::get(getLLVMContext(), IntResult), nullptr};
|
2018-12-19 06:54:03 +08:00
|
|
|
}
|
|
|
|
|
2018-12-01 07:41:18 +08:00
|
|
|
Expr::EvalResult Result;
|
2015-06-13 09:16:10 +08:00
|
|
|
if (InputExpr->EvaluateAsInt(Result, getContext()))
|
2022-01-05 22:16:24 +08:00
|
|
|
return {llvm::ConstantInt::get(getLLVMContext(), Result.Val.getInt()),
|
|
|
|
nullptr};
|
2015-06-12 02:19:34 +08:00
|
|
|
}
|
|
|
|
|
2010-07-16 08:55:21 +08:00
|
|
|
if (Info.allowsRegister() || !Info.allowsMemory())
|
2013-03-08 05:37:08 +08:00
|
|
|
if (CodeGenFunction::hasScalarEvaluationKind(InputExpr->getType()))
|
2022-01-05 22:16:24 +08:00
|
|
|
return {EmitScalarExpr(InputExpr), nullptr};
|
2015-12-15 22:04:18 +08:00
|
|
|
if (InputExpr->getStmtClass() == Expr::CXXThisExprClass)
|
2022-01-05 22:16:24 +08:00
|
|
|
return {EmitScalarExpr(InputExpr), nullptr};
|
2010-07-16 08:55:21 +08:00
|
|
|
InputExpr = InputExpr->IgnoreParenNoopCasts(getContext());
|
|
|
|
LValue Dest = EmitLValue(InputExpr);
|
2013-10-02 10:29:49 +08:00
|
|
|
return EmitAsmInputLValue(Info, Dest, InputExpr->getType(), ConstraintStr,
|
|
|
|
InputExpr->getExprLoc());
|
2010-07-16 08:55:21 +08:00
|
|
|
}
|
|
|
|
|
2010-11-17 13:58:54 +08:00
|
|
|
/// getAsmSrcLocInfo - Return the !srcloc metadata node to attach to an inline
|
2010-11-17 16:25:26 +08:00
|
|
|
/// asm call instruction. The !srcloc MDNode contains a list of constant
|
|
|
|
/// integers which are the source locations of the start of each line in the
|
|
|
|
/// asm.
|
2010-11-17 13:58:54 +08:00
|
|
|
static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
|
|
|
|
CodeGenFunction &CGF) {
|
2014-12-10 02:39:32 +08:00
|
|
|
SmallVector<llvm::Metadata *, 8> Locs;
|
2010-11-17 16:25:26 +08:00
|
|
|
// Add the location of the first line to the MDNode.
|
2014-12-10 02:39:32 +08:00
|
|
|
Locs.push_back(llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
|
[clang] Use i64 for the !srcloc metadata on asm IR nodes.
This is part of a patch series working towards the ability to make
SourceLocation into a 64-bit type to handle larger translation units.
!srcloc is generated in clang codegen, and pulled back out by llvm
functions like AsmPrinter::emitInlineAsm that need to report errors in
the inline asm. From there it goes to LLVMContext::emitError, is
stored in DiagnosticInfoInlineAsm, and ends up back in clang, at
BackendConsumer::InlineAsmDiagHandler(), which reconstitutes a true
clang::SourceLocation from the integer cookie.
Throughout this code path, it's now 64-bit rather than 32, which means
that if SourceLocation is expanded to a 64-bit type, this error report
won't lose half of the data.
The compiler will tolerate both of i32 and i64 !srcloc metadata in
input IR without faulting. Test added in llvm/MC. (The semantic
accuracy of the metadata is another matter, but I don't know of any
situation where that matters: if you're reading an IR file written by
a previous run of clang, you don't have the SourceManager that can
relate those source locations back to the original source files.)
Original version of the patch by Mikhail Maltsev.
Reviewed By: dexonsmith
Differential Revision: https://reviews.llvm.org/D105491
2021-07-22 17:08:06 +08:00
|
|
|
CGF.Int64Ty, Str->getBeginLoc().getRawEncoding())));
|
2011-07-23 18:55:15 +08:00
|
|
|
StringRef StrVal = Str->getString();
|
2010-11-17 16:25:26 +08:00
|
|
|
if (!StrVal.empty()) {
|
|
|
|
const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
|
2012-03-11 15:00:24 +08:00
|
|
|
const LangOptions &LangOpts = CGF.CGM.getLangOpts();
|
2015-12-10 09:11:47 +08:00
|
|
|
unsigned StartToken = 0;
|
|
|
|
unsigned ByteOffset = 0;
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2010-11-17 16:25:26 +08:00
|
|
|
// Add the location of the start of each subsequent line of the asm to the
|
|
|
|
// MDNode.
|
2015-12-10 09:11:47 +08:00
|
|
|
for (unsigned i = 0, e = StrVal.size() - 1; i != e; ++i) {
|
2010-11-17 16:25:26 +08:00
|
|
|
if (StrVal[i] != '\n') continue;
|
2015-12-10 09:11:47 +08:00
|
|
|
SourceLocation LineLoc = Str->getLocationOfByte(
|
|
|
|
i + 1, SM, LangOpts, CGF.getTarget(), &StartToken, &ByteOffset);
|
2014-12-10 02:39:32 +08:00
|
|
|
Locs.push_back(llvm::ConstantAsMetadata::get(
|
[clang] Use i64 for the !srcloc metadata on asm IR nodes.
This is part of a patch series working towards the ability to make
SourceLocation into a 64-bit type to handle larger translation units.
!srcloc is generated in clang codegen, and pulled back out by llvm
functions like AsmPrinter::emitInlineAsm that need to report errors in
the inline asm. From there it goes to LLVMContext::emitError, is
stored in DiagnosticInfoInlineAsm, and ends up back in clang, at
BackendConsumer::InlineAsmDiagHandler(), which reconstitutes a true
clang::SourceLocation from the integer cookie.
Throughout this code path, it's now 64-bit rather than 32, which means
that if SourceLocation is expanded to a 64-bit type, this error report
won't lose half of the data.
The compiler will tolerate both of i32 and i64 !srcloc metadata in
input IR without faulting. Test added in llvm/MC. (The semantic
accuracy of the metadata is another matter, but I don't know of any
situation where that matters: if you're reading an IR file written by
a previous run of clang, you don't have the SourceManager that can
relate those source locations back to the original source files.)
Original version of the patch by Mikhail Maltsev.
Reviewed By: dexonsmith
Differential Revision: https://reviews.llvm.org/D105491
2021-07-22 17:08:06 +08:00
|
|
|
llvm::ConstantInt::get(CGF.Int64Ty, LineLoc.getRawEncoding())));
|
2010-11-17 16:25:26 +08:00
|
|
|
}
|
2012-06-21 01:43:05 +08:00
|
|
|
}
|
|
|
|
|
2011-04-22 03:59:12 +08:00
|
|
|
return llvm::MDNode::get(CGF.getLLVMContext(), Locs);
|
2010-11-17 13:58:54 +08:00
|
|
|
}
|
|
|
|
|
2019-06-03 23:57:25 +08:00
|
|
|
static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
|
2021-05-14 02:05:11 +08:00
|
|
|
bool HasUnwindClobber, bool ReadOnly,
|
|
|
|
bool ReadNone, bool NoMerge, const AsmStmt &S,
|
2019-06-03 23:57:25 +08:00
|
|
|
const std::vector<llvm::Type *> &ResultRegTypes,
|
2022-01-05 22:16:24 +08:00
|
|
|
const std::vector<llvm::Type *> &ArgElemTypes,
|
2019-06-03 23:57:25 +08:00
|
|
|
CodeGenFunction &CGF,
|
|
|
|
std::vector<llvm::Value *> &RegResults) {
|
2021-05-14 02:05:11 +08:00
|
|
|
if (!HasUnwindClobber)
|
2021-08-18 11:25:16 +08:00
|
|
|
Result.addFnAttr(llvm::Attribute::NoUnwind);
|
2021-05-14 02:05:11 +08:00
|
|
|
|
2020-07-21 17:06:11 +08:00
|
|
|
if (NoMerge)
|
2021-08-18 11:25:16 +08:00
|
|
|
Result.addFnAttr(llvm::Attribute::NoMerge);
|
2019-06-03 23:57:25 +08:00
|
|
|
// Attach readnone and readonly attributes.
|
|
|
|
if (!HasSideEffect) {
|
|
|
|
if (ReadNone)
|
2021-08-18 11:25:16 +08:00
|
|
|
Result.addFnAttr(llvm::Attribute::ReadNone);
|
2019-06-03 23:57:25 +08:00
|
|
|
else if (ReadOnly)
|
2021-08-18 11:25:16 +08:00
|
|
|
Result.addFnAttr(llvm::Attribute::ReadOnly);
|
2019-06-03 23:57:25 +08:00
|
|
|
}
|
|
|
|
|
2022-01-05 22:16:24 +08:00
|
|
|
// Add elementtype attribute for indirect constraints.
|
|
|
|
for (auto Pair : llvm::enumerate(ArgElemTypes)) {
|
|
|
|
if (Pair.value()) {
|
|
|
|
auto Attr = llvm::Attribute::get(
|
|
|
|
CGF.getLLVMContext(), llvm::Attribute::ElementType, Pair.value());
|
|
|
|
Result.addParamAttr(Pair.index(), Attr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-03 23:57:25 +08:00
|
|
|
// Slap the source location of the inline asm into a !srcloc metadata on the
|
|
|
|
// call.
|
|
|
|
if (const auto *gccAsmStmt = dyn_cast<GCCAsmStmt>(&S))
|
|
|
|
Result.setMetadata("srcloc",
|
|
|
|
getAsmSrcLocInfo(gccAsmStmt->getAsmString(), CGF));
|
|
|
|
else {
|
|
|
|
// At least put the line number on MS inline asm blobs.
|
[clang] Use i64 for the !srcloc metadata on asm IR nodes.
This is part of a patch series working towards the ability to make
SourceLocation into a 64-bit type to handle larger translation units.
!srcloc is generated in clang codegen, and pulled back out by llvm
functions like AsmPrinter::emitInlineAsm that need to report errors in
the inline asm. From there it goes to LLVMContext::emitError, is
stored in DiagnosticInfoInlineAsm, and ends up back in clang, at
BackendConsumer::InlineAsmDiagHandler(), which reconstitutes a true
clang::SourceLocation from the integer cookie.
Throughout this code path, it's now 64-bit rather than 32, which means
that if SourceLocation is expanded to a 64-bit type, this error report
won't lose half of the data.
The compiler will tolerate both of i32 and i64 !srcloc metadata in
input IR without faulting. Test added in llvm/MC. (The semantic
accuracy of the metadata is another matter, but I don't know of any
situation where that matters: if you're reading an IR file written by
a previous run of clang, you don't have the SourceManager that can
relate those source locations back to the original source files.)
Original version of the patch by Mikhail Maltsev.
Reviewed By: dexonsmith
Differential Revision: https://reviews.llvm.org/D105491
2021-07-22 17:08:06 +08:00
|
|
|
llvm::Constant *Loc =
|
|
|
|
llvm::ConstantInt::get(CGF.Int64Ty, S.getAsmLoc().getRawEncoding());
|
2019-06-03 23:57:25 +08:00
|
|
|
Result.setMetadata("srcloc",
|
|
|
|
llvm::MDNode::get(CGF.getLLVMContext(),
|
|
|
|
llvm::ConstantAsMetadata::get(Loc)));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (CGF.getLangOpts().assumeFunctionsAreConvergent())
|
|
|
|
// Conservatively, mark all inline asm blocks in CUDA or OpenCL as
|
|
|
|
// convergent (meaning, they may call an intrinsically convergent op, such
|
|
|
|
// as bar.sync, and so can't have certain optimizations applied around
|
|
|
|
// them).
|
2021-08-18 11:25:16 +08:00
|
|
|
Result.addFnAttr(llvm::Attribute::Convergent);
|
2019-06-03 23:57:25 +08:00
|
|
|
// Extract all of the register value results from the asm.
|
|
|
|
if (ResultRegTypes.size() == 1) {
|
|
|
|
RegResults.push_back(&Result);
|
|
|
|
} else {
|
|
|
|
for (unsigned i = 0, e = ResultRegTypes.size(); i != e; ++i) {
|
|
|
|
llvm::Value *Tmp = CGF.Builder.CreateExtractValue(&Result, i, "asmresult");
|
|
|
|
RegResults.push_back(Tmp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-29 02:54:39 +08:00
|
|
|
void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
|
2022-05-14 05:24:27 +08:00
|
|
|
// Pop all cleanup blocks at the end of the asm statement.
|
|
|
|
CodeGenFunction::RunCleanupsScope Cleanups(*this);
|
|
|
|
|
2012-08-25 01:05:45 +08:00
|
|
|
// Assemble the final asm string.
|
2012-08-28 04:23:31 +08:00
|
|
|
std::string AsmString = S.generateAsmString(getContext());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-03 15:05:00 +08:00
|
|
|
// Get all the output and input constraints together.
|
2011-07-23 18:55:15 +08:00
|
|
|
SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
|
|
|
|
SmallVector<TargetInfo::ConstraintInfo, 4> InputConstraintInfos;
|
2009-05-03 15:05:00 +08:00
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
|
2013-05-03 08:10:13 +08:00
|
|
|
StringRef Name;
|
|
|
|
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
|
|
|
|
Name = GAS->getOutputName(i);
|
|
|
|
TargetInfo::ConstraintInfo Info(S.getOutputConstraint(i), Name);
|
2013-04-17 06:48:15 +08:00
|
|
|
bool IsValid = getTarget().validateOutputConstraint(Info); (void)IsValid;
|
2013-11-22 18:20:40 +08:00
|
|
|
assert(IsValid && "Failed to parse output constraint");
|
2009-05-03 15:05:00 +08:00
|
|
|
OutputConstraintInfos.push_back(Info);
|
2009-09-09 23:08:12 +08:00
|
|
|
}
|
|
|
|
|
2009-05-03 15:05:00 +08:00
|
|
|
for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
|
2013-05-03 08:10:13 +08:00
|
|
|
StringRef Name;
|
|
|
|
if (const GCCAsmStmt *GAS = dyn_cast<GCCAsmStmt>(&S))
|
|
|
|
Name = GAS->getInputName(i);
|
|
|
|
TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name);
|
2013-04-17 06:48:15 +08:00
|
|
|
bool IsValid =
|
2015-10-21 10:34:10 +08:00
|
|
|
getTarget().validateInputConstraint(OutputConstraintInfos, Info);
|
2010-03-04 05:52:23 +08:00
|
|
|
assert(IsValid && "Failed to parse input constraint"); (void)IsValid;
|
2009-05-03 15:05:00 +08:00
|
|
|
InputConstraintInfos.push_back(Info);
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
std::string Constraints;
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-03 15:53:25 +08:00
|
|
|
std::vector<LValue> ResultRegDests;
|
|
|
|
std::vector<QualType> ResultRegQualTys;
|
2011-07-11 17:56:20 +08:00
|
|
|
std::vector<llvm::Type *> ResultRegTypes;
|
|
|
|
std::vector<llvm::Type *> ResultTruncRegTypes;
|
2012-05-02 03:53:37 +08:00
|
|
|
std::vector<llvm::Type *> ArgTypes;
|
2022-01-05 22:16:24 +08:00
|
|
|
std::vector<llvm::Type *> ArgElemTypes;
|
2008-02-06 00:35:33 +08:00
|
|
|
std::vector<llvm::Value*> Args;
|
2019-08-29 19:21:41 +08:00
|
|
|
llvm::BitVector ResultTypeRequiresCast;
|
2008-02-06 04:01:53 +08:00
|
|
|
|
|
|
|
// Keep track of inout constraints.
|
|
|
|
std::string InOutConstraints;
|
|
|
|
std::vector<llvm::Value*> InOutArgs;
|
2011-07-10 01:41:47 +08:00
|
|
|
std::vector<llvm::Type*> InOutArgTypes;
|
2022-01-05 22:16:24 +08:00
|
|
|
std::vector<llvm::Type*> InOutArgElemTypes;
|
2009-01-28 04:38:24 +08:00
|
|
|
|
2019-03-15 03:46:51 +08:00
|
|
|
// Keep track of out constraints for tied input operand.
|
|
|
|
std::vector<std::string> OutputConstraints;
|
|
|
|
|
2020-10-14 14:48:29 +08:00
|
|
|
// Keep track of defined physregs.
|
|
|
|
llvm::SmallSet<std::string, 8> PhysRegOutputs;
|
|
|
|
|
2015-07-11 02:44:40 +08:00
|
|
|
// An inline asm can be marked readonly if it meets the following conditions:
|
|
|
|
// - it doesn't have any sideeffects
|
|
|
|
// - it doesn't clobber memory
|
|
|
|
// - it doesn't return a value by-reference
|
|
|
|
// It can be marked readnone if it doesn't have any input memory constraints
|
|
|
|
// in addition to meeting the conditions listed above.
|
|
|
|
bool ReadOnly = true, ReadNone = true;
|
|
|
|
|
2009-09-09 23:08:12 +08:00
|
|
|
for (unsigned i = 0, e = S.getNumOutputs(); i != e; i++) {
|
2009-05-03 15:05:00 +08:00
|
|
|
TargetInfo::ConstraintInfo &Info = OutputConstraintInfos[i];
|
2009-01-28 04:38:24 +08:00
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
// Simplify the output constraint.
|
2009-05-03 15:05:00 +08:00
|
|
|
std::string OutputConstraint(S.getOutputConstraint(i));
|
2013-04-17 06:48:15 +08:00
|
|
|
OutputConstraint = SimplifyConstraint(OutputConstraint.c_str() + 1,
|
2018-03-24 03:43:42 +08:00
|
|
|
getTarget(), &OutputConstraintInfos);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-03-14 01:38:01 +08:00
|
|
|
const Expr *OutExpr = S.getOutputExpr(i);
|
|
|
|
OutExpr = OutExpr->IgnoreParenNoopCasts(getContext());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2020-10-14 14:48:29 +08:00
|
|
|
std::string GCCReg;
|
2011-06-03 22:52:25 +08:00
|
|
|
OutputConstraint = AddVariableConstraints(OutputConstraint, *OutExpr,
|
2015-02-04 22:25:47 +08:00
|
|
|
getTarget(), CGM, S,
|
2020-10-14 14:48:29 +08:00
|
|
|
Info.earlyClobber(),
|
|
|
|
&GCCReg);
|
|
|
|
// Give an error on multiple outputs to same physreg.
|
|
|
|
if (!GCCReg.empty() && !PhysRegOutputs.insert(GCCReg).second)
|
|
|
|
CGM.Error(S.getAsmLoc(), "multiple outputs to hard register: " + GCCReg);
|
|
|
|
|
2019-03-15 03:46:51 +08:00
|
|
|
OutputConstraints.push_back(OutputConstraint);
|
2009-03-14 01:38:01 +08:00
|
|
|
LValue Dest = EmitLValue(OutExpr);
|
2009-05-03 15:53:25 +08:00
|
|
|
if (!Constraints.empty())
|
2009-05-01 08:16:04 +08:00
|
|
|
Constraints += ',';
|
|
|
|
|
2009-05-03 16:21:20 +08:00
|
|
|
// If this is a register output, then make the inline asm return it
|
|
|
|
// by-value. If this is a memory result, return the value by-reference.
|
2021-07-28 23:40:59 +08:00
|
|
|
QualType QTy = OutExpr->getType();
|
|
|
|
const bool IsScalarOrAggregate = hasScalarEvaluationKind(QTy) ||
|
|
|
|
hasAggregateEvaluationKind(QTy);
|
|
|
|
if (!Info.allowsMemory() && IsScalarOrAggregate) {
|
|
|
|
|
2009-05-03 16:21:20 +08:00
|
|
|
Constraints += "=" + OutputConstraint;
|
2021-07-28 23:40:59 +08:00
|
|
|
ResultRegQualTys.push_back(QTy);
|
2009-05-03 15:53:25 +08:00
|
|
|
ResultRegDests.push_back(Dest);
|
2021-07-28 23:40:59 +08:00
|
|
|
|
|
|
|
llvm::Type *Ty = ConvertTypeForMem(QTy);
|
|
|
|
const bool RequiresCast = Info.allowsRegister() &&
|
|
|
|
(getTargetHooks().isScalarizableAsmOperand(*this, Ty) ||
|
|
|
|
Ty->isAggregateType());
|
|
|
|
|
|
|
|
ResultTruncRegTypes.push_back(Ty);
|
|
|
|
ResultTypeRequiresCast.push_back(RequiresCast);
|
|
|
|
|
|
|
|
if (RequiresCast) {
|
|
|
|
unsigned Size = getContext().getTypeSize(QTy);
|
|
|
|
Ty = llvm::IntegerType::get(getLLVMContext(), Size);
|
2019-08-29 19:21:41 +08:00
|
|
|
}
|
2021-07-28 23:40:59 +08:00
|
|
|
ResultRegTypes.push_back(Ty);
|
2009-05-03 16:21:20 +08:00
|
|
|
// If this output is tied to an input, and if the input is larger, then
|
|
|
|
// we need to set the actual result type of the inline asm node to be the
|
|
|
|
// same as the input type.
|
|
|
|
if (Info.hasMatchingInput()) {
|
2009-05-03 16:38:58 +08:00
|
|
|
unsigned InputNo;
|
|
|
|
for (InputNo = 0; InputNo != S.getNumInputs(); ++InputNo) {
|
|
|
|
TargetInfo::ConstraintInfo &Input = InputConstraintInfos[InputNo];
|
2010-04-24 01:27:29 +08:00
|
|
|
if (Input.hasTiedOperand() && Input.getTiedOperand() == i)
|
2009-05-03 16:21:20 +08:00
|
|
|
break;
|
2009-05-03 16:38:58 +08:00
|
|
|
}
|
|
|
|
assert(InputNo != S.getNumInputs() && "Didn't find matching input!");
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-03 16:21:20 +08:00
|
|
|
QualType InputTy = S.getInputExpr(InputNo)->getType();
|
2010-04-24 01:27:29 +08:00
|
|
|
QualType OutputType = OutExpr->getType();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-03 16:21:20 +08:00
|
|
|
uint64_t InputSize = getContext().getTypeSize(InputTy);
|
2010-04-24 01:27:29 +08:00
|
|
|
if (getContext().getTypeSize(OutputType) < InputSize) {
|
|
|
|
// Form the asm to return the value as a larger integer or fp type.
|
|
|
|
ResultRegTypes.back() = ConvertType(InputTy);
|
2009-05-03 16:21:20 +08:00
|
|
|
}
|
|
|
|
}
|
2013-06-07 08:04:50 +08:00
|
|
|
if (llvm::Type* AdjTy =
|
2011-02-20 07:03:58 +08:00
|
|
|
getTargetHooks().adjustInlineAsmType(*this, OutputConstraint,
|
|
|
|
ResultRegTypes.back()))
|
2010-10-30 07:12:32 +08:00
|
|
|
ResultRegTypes.back() = AdjTy;
|
2013-06-07 08:04:50 +08:00
|
|
|
else {
|
|
|
|
CGM.getDiags().Report(S.getAsmLoc(),
|
|
|
|
diag::err_asm_invalid_type_in_input)
|
|
|
|
<< OutExpr->getType() << OutputConstraint;
|
|
|
|
}
|
2018-08-15 04:21:05 +08:00
|
|
|
|
|
|
|
// Update largest vector width for any vector types.
|
|
|
|
if (auto *VT = dyn_cast<llvm::VectorType>(ResultRegTypes.back()))
|
2020-03-17 18:27:29 +08:00
|
|
|
LargestVectorWidth =
|
|
|
|
std::max((uint64_t)LargestVectorWidth,
|
|
|
|
VT->getPrimitiveSizeInBits().getKnownMinSize());
|
2008-02-06 00:35:33 +08:00
|
|
|
} else {
|
2022-01-05 22:16:24 +08:00
|
|
|
Address DestAddr = Dest.getAddress(*this);
|
2020-11-18 19:32:45 +08:00
|
|
|
// Matrix types in memory are represented by arrays, but accessed through
|
|
|
|
// vector pointers, with the alignment specified on the access operation.
|
|
|
|
// For inline assembly, update pointer arguments to use vector pointers.
|
|
|
|
// Otherwise there will be a mis-match if the matrix is also an
|
|
|
|
// input-argument which is represented as vector.
|
2022-01-05 22:16:24 +08:00
|
|
|
if (isa<MatrixType>(OutExpr->getType().getCanonicalType()))
|
|
|
|
DestAddr = Builder.CreateElementBitCast(
|
|
|
|
DestAddr, ConvertType(OutExpr->getType()));
|
|
|
|
|
|
|
|
ArgTypes.push_back(DestAddr.getType());
|
|
|
|
ArgElemTypes.push_back(DestAddr.getElementType());
|
|
|
|
Args.push_back(DestAddr.getPointer());
|
2008-02-06 04:01:53 +08:00
|
|
|
Constraints += "=*";
|
2008-02-06 00:35:33 +08:00
|
|
|
Constraints += OutputConstraint;
|
2015-07-11 02:44:40 +08:00
|
|
|
ReadOnly = ReadNone = false;
|
2008-02-06 04:01:53 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-04-26 15:16:29 +08:00
|
|
|
if (Info.isReadWrite()) {
|
2008-02-06 04:01:53 +08:00
|
|
|
InOutConstraints += ',';
|
2009-01-12 03:32:54 +08:00
|
|
|
|
2009-08-05 02:18:36 +08:00
|
|
|
const Expr *InputExpr = S.getOutputExpr(i);
|
2022-01-05 22:16:24 +08:00
|
|
|
llvm::Value *Arg;
|
|
|
|
llvm::Type *ArgElemType;
|
|
|
|
std::tie(Arg, ArgElemType) = EmitAsmInputLValue(
|
|
|
|
Info, Dest, InputExpr->getType(), InOutConstraints,
|
|
|
|
InputExpr->getExprLoc());
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-03-23 07:25:07 +08:00
|
|
|
if (llvm::Type* AdjTy =
|
2013-06-07 08:04:50 +08:00
|
|
|
getTargetHooks().adjustInlineAsmType(*this, OutputConstraint,
|
|
|
|
Arg->getType()))
|
2012-03-23 07:25:07 +08:00
|
|
|
Arg = Builder.CreateBitCast(Arg, AdjTy);
|
|
|
|
|
2018-08-15 04:21:05 +08:00
|
|
|
// Update largest vector width for any vector types.
|
|
|
|
if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
|
2020-03-17 18:27:29 +08:00
|
|
|
LargestVectorWidth =
|
|
|
|
std::max((uint64_t)LargestVectorWidth,
|
|
|
|
VT->getPrimitiveSizeInBits().getKnownMinSize());
|
2020-10-14 14:48:29 +08:00
|
|
|
// Only tie earlyclobber physregs.
|
|
|
|
if (Info.allowsRegister() && (GCCReg.empty() || Info.earlyClobber()))
|
2009-01-12 05:23:27 +08:00
|
|
|
InOutConstraints += llvm::utostr(i);
|
|
|
|
else
|
|
|
|
InOutConstraints += OutputConstraint;
|
2009-01-12 03:46:50 +08:00
|
|
|
|
2009-08-05 02:18:36 +08:00
|
|
|
InOutArgTypes.push_back(Arg->getType());
|
2022-01-05 22:16:24 +08:00
|
|
|
InOutArgElemTypes.push_back(ArgElemType);
|
2009-08-05 02:18:36 +08:00
|
|
|
InOutArgs.push_back(Arg);
|
2008-02-06 04:01:53 +08:00
|
|
|
}
|
2008-02-06 00:35:33 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-09-05 04:04:38 +08:00
|
|
|
// If this is a Microsoft-style asm blob, store the return registers (EAX:EDX)
|
|
|
|
// to the return value slot. Only do this when returning in registers.
|
|
|
|
if (isa<MSAsmStmt>(&S)) {
|
|
|
|
const ABIArgInfo &RetAI = CurFnInfo->getReturnInfo();
|
|
|
|
if (RetAI.isDirect() || RetAI.isExtend()) {
|
|
|
|
// Make a fake lvalue for the return value slot.
|
2021-12-15 09:40:33 +08:00
|
|
|
LValue ReturnSlot = MakeAddrLValueWithoutTBAA(ReturnValue, FnRetTy);
|
2014-09-05 04:04:38 +08:00
|
|
|
CGM.getTargetCodeGenInfo().addReturnRegisterOutputs(
|
|
|
|
*this, ReturnSlot, Constraints, ResultRegTypes, ResultTruncRegTypes,
|
|
|
|
ResultRegDests, AsmString, S.getNumOutputs());
|
|
|
|
SawAsmBlock = true;
|
|
|
|
}
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
for (unsigned i = 0, e = S.getNumInputs(); i != e; i++) {
|
|
|
|
const Expr *InputExpr = S.getInputExpr(i);
|
|
|
|
|
2009-05-03 15:05:00 +08:00
|
|
|
TargetInfo::ConstraintInfo &Info = InputConstraintInfos[i];
|
|
|
|
|
2015-07-11 02:44:40 +08:00
|
|
|
if (Info.allowsMemory())
|
|
|
|
ReadNone = false;
|
|
|
|
|
2009-05-03 15:53:25 +08:00
|
|
|
if (!Constraints.empty())
|
2008-02-06 00:35:33 +08:00
|
|
|
Constraints += ',';
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
// Simplify the input constraint.
|
2009-05-03 15:05:00 +08:00
|
|
|
std::string InputConstraint(S.getInputConstraint(i));
|
2013-04-17 06:48:15 +08:00
|
|
|
InputConstraint = SimplifyConstraint(InputConstraint.c_str(), getTarget(),
|
2009-04-27 01:57:12 +08:00
|
|
|
&OutputConstraintInfos);
|
2008-02-06 00:35:33 +08:00
|
|
|
|
2015-02-07 02:44:18 +08:00
|
|
|
InputConstraint = AddVariableConstraints(
|
|
|
|
InputConstraint, *InputExpr->IgnoreParenNoopCasts(getContext()),
|
|
|
|
getTarget(), CGM, S, false /* No EarlyClobber */);
|
2010-12-31 06:59:32 +08:00
|
|
|
|
2019-03-15 03:46:51 +08:00
|
|
|
std::string ReplaceConstraint (InputConstraint);
|
2022-01-05 22:16:24 +08:00
|
|
|
llvm::Value *Arg;
|
|
|
|
llvm::Type *ArgElemType;
|
|
|
|
std::tie(Arg, ArgElemType) = EmitAsmInput(Info, InputExpr, Constraints);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-03 15:27:51 +08:00
|
|
|
// If this input argument is tied to a larger output result, extend the
|
|
|
|
// input to be the same size as the output. The LLVM backend wants to see
|
|
|
|
// the input and output of a matching constraint be the same size. Note
|
|
|
|
// that GCC does not define what the top bits are here. We use zext because
|
|
|
|
// that is usually cheaper, but LLVM IR should really get an anyext someday.
|
|
|
|
if (Info.hasTiedOperand()) {
|
|
|
|
unsigned Output = Info.getTiedOperand();
|
2010-04-24 01:27:29 +08:00
|
|
|
QualType OutputType = S.getOutputExpr(Output)->getType();
|
2009-05-03 15:27:51 +08:00
|
|
|
QualType InputTy = InputExpr->getType();
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2010-04-24 01:27:29 +08:00
|
|
|
if (getContext().getTypeSize(OutputType) >
|
2009-05-03 15:27:51 +08:00
|
|
|
getContext().getTypeSize(InputTy)) {
|
|
|
|
// Use ptrtoint as appropriate so that we can do our extension.
|
|
|
|
if (isa<llvm::PointerType>(Arg->getType()))
|
2010-06-27 15:15:29 +08:00
|
|
|
Arg = Builder.CreatePtrToInt(Arg, IntPtrTy);
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *OutputTy = ConvertType(OutputType);
|
2010-04-24 01:27:29 +08:00
|
|
|
if (isa<llvm::IntegerType>(OutputTy))
|
|
|
|
Arg = Builder.CreateZExt(Arg, OutputTy);
|
2011-07-29 08:24:50 +08:00
|
|
|
else if (isa<llvm::PointerType>(OutputTy))
|
|
|
|
Arg = Builder.CreateZExt(Arg, IntPtrTy);
|
2022-03-03 03:18:54 +08:00
|
|
|
else if (OutputTy->isFloatingPointTy())
|
2010-04-24 01:27:29 +08:00
|
|
|
Arg = Builder.CreateFPExt(Arg, OutputTy);
|
2009-05-03 15:27:51 +08:00
|
|
|
}
|
2019-03-15 03:46:51 +08:00
|
|
|
// Deal with the tied operands' constraint code in adjustInlineAsmType.
|
|
|
|
ReplaceConstraint = OutputConstraints[Output];
|
2009-05-03 15:27:51 +08:00
|
|
|
}
|
2012-03-23 07:25:07 +08:00
|
|
|
if (llvm::Type* AdjTy =
|
2019-03-15 03:46:51 +08:00
|
|
|
getTargetHooks().adjustInlineAsmType(*this, ReplaceConstraint,
|
2011-02-20 07:03:58 +08:00
|
|
|
Arg->getType()))
|
2010-10-30 07:12:32 +08:00
|
|
|
Arg = Builder.CreateBitCast(Arg, AdjTy);
|
2013-06-07 08:04:50 +08:00
|
|
|
else
|
|
|
|
CGM.getDiags().Report(S.getAsmLoc(), diag::err_asm_invalid_type_in_input)
|
|
|
|
<< InputExpr->getType() << InputConstraint;
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2018-08-15 04:21:05 +08:00
|
|
|
// Update largest vector width for any vector types.
|
|
|
|
if (auto *VT = dyn_cast<llvm::VectorType>(Arg->getType()))
|
2020-03-17 18:27:29 +08:00
|
|
|
LargestVectorWidth =
|
|
|
|
std::max((uint64_t)LargestVectorWidth,
|
|
|
|
VT->getPrimitiveSizeInBits().getKnownMinSize());
|
2018-08-15 04:21:05 +08:00
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
ArgTypes.push_back(Arg->getType());
|
2022-01-05 22:16:24 +08:00
|
|
|
ArgElemTypes.push_back(ArgElemType);
|
2008-02-06 00:35:33 +08:00
|
|
|
Args.push_back(Arg);
|
|
|
|
Constraints += InputConstraint;
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2022-01-12 03:51:22 +08:00
|
|
|
// Append the "input" part of inout constraints.
|
|
|
|
for (unsigned i = 0, e = InOutArgs.size(); i != e; i++) {
|
|
|
|
ArgTypes.push_back(InOutArgTypes[i]);
|
|
|
|
ArgElemTypes.push_back(InOutArgElemTypes[i]);
|
|
|
|
Args.push_back(InOutArgs[i]);
|
|
|
|
}
|
|
|
|
Constraints += InOutConstraints;
|
|
|
|
|
2019-06-03 23:57:25 +08:00
|
|
|
// Labels
|
|
|
|
SmallVector<llvm::BasicBlock *, 16> Transfer;
|
|
|
|
llvm::BasicBlock *Fallthrough = nullptr;
|
|
|
|
bool IsGCCAsmGoto = false;
|
|
|
|
if (const auto *GS = dyn_cast<GCCAsmStmt>(&S)) {
|
|
|
|
IsGCCAsmGoto = GS->isAsmGoto();
|
|
|
|
if (IsGCCAsmGoto) {
|
Support output constraints on "asm goto"
Summary:
Clang's "asm goto" feature didn't initially support outputs constraints. That
was the same behavior as gcc's implementation. The decision by gcc not to
support outputs was based on a restriction in their IR regarding terminators.
LLVM doesn't restrict terminators from returning values (e.g. 'invoke'), so
it made sense to support this feature.
Output values are valid only on the 'fallthrough' path. If an output value's used
on an indirect branch, then it's 'poisoned'.
In theory, outputs *could* be valid on the 'indirect' paths, but it's very
difficult to guarantee that the original semantics would be retained. E.g.
because indirect labels could be used as data, we wouldn't be able to split
critical edges in situations where two 'callbr' instructions have the same
indirect label, because the indirect branch's destination would no longer be
the same.
Reviewers: jyknight, nickdesaulniers, hfinkel
Reviewed By: jyknight, nickdesaulniers
Subscribers: MaskRay, rsmith, hiraditya, llvm-commits, cfe-commits, craig.topper, rnk
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69876
2020-02-25 10:32:50 +08:00
|
|
|
for (const auto *E : GS->labels()) {
|
2019-06-03 23:57:25 +08:00
|
|
|
JumpDest Dest = getJumpDestForLabel(E->getLabel());
|
|
|
|
Transfer.push_back(Dest.getBlock());
|
|
|
|
llvm::BlockAddress *BA =
|
|
|
|
llvm::BlockAddress::get(CurFn, Dest.getBlock());
|
|
|
|
Args.push_back(BA);
|
|
|
|
ArgTypes.push_back(BA->getType());
|
2022-01-05 22:16:24 +08:00
|
|
|
ArgElemTypes.push_back(nullptr);
|
2019-06-03 23:57:25 +08:00
|
|
|
if (!Constraints.empty())
|
|
|
|
Constraints += ',';
|
2022-01-12 03:32:35 +08:00
|
|
|
Constraints += 'i';
|
2019-06-03 23:57:25 +08:00
|
|
|
}
|
Support output constraints on "asm goto"
Summary:
Clang's "asm goto" feature didn't initially support outputs constraints. That
was the same behavior as gcc's implementation. The decision by gcc not to
support outputs was based on a restriction in their IR regarding terminators.
LLVM doesn't restrict terminators from returning values (e.g. 'invoke'), so
it made sense to support this feature.
Output values are valid only on the 'fallthrough' path. If an output value's used
on an indirect branch, then it's 'poisoned'.
In theory, outputs *could* be valid on the 'indirect' paths, but it's very
difficult to guarantee that the original semantics would be retained. E.g.
because indirect labels could be used as data, we wouldn't be able to split
critical edges in situations where two 'callbr' instructions have the same
indirect label, because the indirect branch's destination would no longer be
the same.
Reviewers: jyknight, nickdesaulniers, hfinkel
Reviewed By: jyknight, nickdesaulniers
Subscribers: MaskRay, rsmith, hiraditya, llvm-commits, cfe-commits, craig.topper, rnk
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69876
2020-02-25 10:32:50 +08:00
|
|
|
Fallthrough = createBasicBlock("asm.fallthrough");
|
2019-06-03 23:57:25 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-14 02:05:11 +08:00
|
|
|
bool HasUnwindClobber = false;
|
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
// Clobbers
|
|
|
|
for (unsigned i = 0, e = S.getNumClobbers(); i != e; i++) {
|
2012-08-28 07:47:56 +08:00
|
|
|
StringRef Clobber = S.getClobber(i);
|
2008-02-06 00:35:33 +08:00
|
|
|
|
2015-07-11 02:44:40 +08:00
|
|
|
if (Clobber == "memory")
|
|
|
|
ReadOnly = ReadNone = false;
|
2021-05-14 02:05:11 +08:00
|
|
|
else if (Clobber == "unwind") {
|
|
|
|
HasUnwindClobber = true;
|
|
|
|
continue;
|
|
|
|
} else if (Clobber != "cc") {
|
2014-09-05 04:04:38 +08:00
|
|
|
Clobber = getTarget().getNormalizedGCCRegisterName(Clobber);
|
2019-09-09 22:59:34 +08:00
|
|
|
if (CGM.getCodeGenOpts().StackClashProtector &&
|
|
|
|
getTarget().isSPRegName(Clobber)) {
|
|
|
|
CGM.getDiags().Report(S.getAsmLoc(),
|
|
|
|
diag::warn_stack_clash_protection_inline_asm);
|
|
|
|
}
|
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2021-01-27 21:10:33 +08:00
|
|
|
if (isa<MSAsmStmt>(&S)) {
|
|
|
|
if (Clobber == "eax" || Clobber == "edx") {
|
|
|
|
if (Constraints.find("=&A") != std::string::npos)
|
|
|
|
continue;
|
|
|
|
std::string::size_type position1 =
|
|
|
|
Constraints.find("={" + Clobber.str() + "}");
|
|
|
|
if (position1 != std::string::npos) {
|
|
|
|
Constraints.insert(position1 + 1, "&");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
std::string::size_type position2 = Constraints.find("=A");
|
|
|
|
if (position2 != std::string::npos) {
|
|
|
|
Constraints.insert(position2 + 1, "&");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-09-05 04:04:38 +08:00
|
|
|
if (!Constraints.empty())
|
2008-02-06 00:35:33 +08:00
|
|
|
Constraints += ',';
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2008-02-06 08:11:32 +08:00
|
|
|
Constraints += "~{";
|
2008-02-06 00:35:33 +08:00
|
|
|
Constraints += Clobber;
|
2008-02-06 08:11:32 +08:00
|
|
|
Constraints += '}';
|
2008-02-06 00:35:33 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2021-05-14 02:05:11 +08:00
|
|
|
assert(!(HasUnwindClobber && IsGCCAsmGoto) &&
|
|
|
|
"unwind clobber can't be used with asm goto");
|
|
|
|
|
2008-02-06 00:35:33 +08:00
|
|
|
// Add machine specific clobbers
|
2013-04-17 06:48:15 +08:00
|
|
|
std::string MachineClobbers = getTarget().getClobbers();
|
2008-12-21 09:15:32 +08:00
|
|
|
if (!MachineClobbers.empty()) {
|
2008-02-06 00:35:33 +08:00
|
|
|
if (!Constraints.empty())
|
|
|
|
Constraints += ',';
|
2008-12-21 09:15:32 +08:00
|
|
|
Constraints += MachineClobbers;
|
2008-02-06 00:35:33 +08:00
|
|
|
}
|
2009-05-01 08:16:04 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::Type *ResultType;
|
2009-05-03 16:21:20 +08:00
|
|
|
if (ResultRegTypes.empty())
|
2012-02-07 08:39:47 +08:00
|
|
|
ResultType = VoidTy;
|
2009-05-03 16:21:20 +08:00
|
|
|
else if (ResultRegTypes.size() == 1)
|
|
|
|
ResultType = ResultRegTypes[0];
|
2009-05-01 08:16:04 +08:00
|
|
|
else
|
2011-02-08 16:22:06 +08:00
|
|
|
ResultType = llvm::StructType::get(getLLVMContext(), ResultRegTypes);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2011-07-18 12:24:23 +08:00
|
|
|
llvm::FunctionType *FTy =
|
2008-02-06 00:35:33 +08:00
|
|
|
llvm::FunctionType::get(ResultType, ArgTypes, false);
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2012-09-05 03:50:17 +08:00
|
|
|
bool HasSideEffect = S.isVolatile() || S.getNumOutputs() == 0;
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
|
|
|
|
llvm::InlineAsm::AsmDialect GnuAsmDialect =
|
|
|
|
CGM.getCodeGenOpts().getInlineAsmDialect() == CodeGenOptions::IAD_ATT
|
|
|
|
? llvm::InlineAsm::AD_ATT
|
|
|
|
: llvm::InlineAsm::AD_Intel;
|
2012-09-06 03:01:07 +08:00
|
|
|
llvm::InlineAsm::AsmDialect AsmDialect = isa<MSAsmStmt>(&S) ?
|
[clang] Make -masm=intel affect inline asm style
With this,
void f() { __asm__("mov eax, ebx"); }
now compiles with clang with -masm=intel.
This matches gcc.
The flag is not accepted in clang-cl mode. It has no effect on
MSVC-style `__asm {}` blocks, which are unconditionally in intel
mode both before and after this change.
One difference to gcc is that in clang, inline asm strings are
"local" while they're "global" in gcc. Building the following with
-masm=intel works with clang, but not with gcc where the ".att_syntax"
from the 2nd __asm__() is in effect until file end (or until a
".intel_syntax" somewhere later in the file):
__asm__("mov eax, ebx");
__asm__(".att_syntax\nmovl %ebx, %eax");
__asm__("mov eax, ebx");
This also updates clang's intrinsic headers to work both in
-masm=att (the default) and -masm=intel modes.
The official solution for this according to "Multiple assembler dialects in asm
templates" in gcc docs->Extensions->Inline Assembly->Extended Asm
is to write every inline asm snippet twice:
bt{l %[Offset],%[Base] | %[Base],%[Offset]}
This works in LLVM after D113932 and D113894, so use that.
(Just putting `.att_syntax` at the start of the snippet works in some but not
all cases: When LLVM interpolates in parameters like `%0`, it uses at&t or
intel syntax according to the inline asm snippet's flavor, so the `.att_syntax`
within the snippet happens to late: The interpolated-in parameter is already
in intel style, and then won't parse in the switched `.att_syntax`.)
It might be nice to invent a `#pragma clang asm_dialect push "att"` /
`#pragma clang asm_dialect pop` to be able to force asm style per snippet,
so that the inline asm string doesn't contain the same code in two variants,
but let's leave that for a follow-up.
Fixes PR21401 and PR20241.
Differential Revision: https://reviews.llvm.org/D113707
2021-11-12 03:20:02 +08:00
|
|
|
llvm::InlineAsm::AD_Intel : GnuAsmDialect;
|
|
|
|
|
2021-05-14 02:05:11 +08:00
|
|
|
llvm::InlineAsm *IA = llvm::InlineAsm::get(
|
|
|
|
FTy, AsmString, Constraints, HasSideEffect,
|
|
|
|
/* IsAlignStack */ false, AsmDialect, HasUnwindClobber);
|
2009-05-03 16:21:20 +08:00
|
|
|
std::vector<llvm::Value*> RegResults;
|
2019-06-03 23:57:25 +08:00
|
|
|
if (IsGCCAsmGoto) {
|
|
|
|
llvm::CallBrInst *Result =
|
|
|
|
Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
|
Support output constraints on "asm goto"
Summary:
Clang's "asm goto" feature didn't initially support outputs constraints. That
was the same behavior as gcc's implementation. The decision by gcc not to
support outputs was based on a restriction in their IR regarding terminators.
LLVM doesn't restrict terminators from returning values (e.g. 'invoke'), so
it made sense to support this feature.
Output values are valid only on the 'fallthrough' path. If an output value's used
on an indirect branch, then it's 'poisoned'.
In theory, outputs *could* be valid on the 'indirect' paths, but it's very
difficult to guarantee that the original semantics would be retained. E.g.
because indirect labels could be used as data, we wouldn't be able to split
critical edges in situations where two 'callbr' instructions have the same
indirect label, because the indirect branch's destination would no longer be
the same.
Reviewers: jyknight, nickdesaulniers, hfinkel
Reviewed By: jyknight, nickdesaulniers
Subscribers: MaskRay, rsmith, hiraditya, llvm-commits, cfe-commits, craig.topper, rnk
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D69876
2020-02-25 10:32:50 +08:00
|
|
|
EmitBlock(Fallthrough);
|
2021-05-14 02:05:11 +08:00
|
|
|
UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, false,
|
|
|
|
ReadOnly, ReadNone, InNoMergeAttributedStmt, S,
|
2022-01-05 22:16:24 +08:00
|
|
|
ResultRegTypes, ArgElemTypes, *this, RegResults);
|
2021-05-14 02:05:11 +08:00
|
|
|
} else if (HasUnwindClobber) {
|
|
|
|
llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, "");
|
|
|
|
UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone,
|
2022-01-05 22:16:24 +08:00
|
|
|
InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
|
|
|
|
*this, RegResults);
|
2009-05-01 08:16:04 +08:00
|
|
|
} else {
|
2019-06-03 23:57:25 +08:00
|
|
|
llvm::CallInst *Result =
|
|
|
|
Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
|
2021-05-14 02:05:11 +08:00
|
|
|
UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, false,
|
|
|
|
ReadOnly, ReadNone, InNoMergeAttributedStmt, S,
|
2022-01-05 22:16:24 +08:00
|
|
|
ResultRegTypes, ArgElemTypes, *this, RegResults);
|
2009-05-03 16:21:20 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2014-09-05 04:04:38 +08:00
|
|
|
assert(RegResults.size() == ResultRegTypes.size());
|
|
|
|
assert(RegResults.size() == ResultTruncRegTypes.size());
|
|
|
|
assert(RegResults.size() == ResultRegDests.size());
|
2019-08-29 19:21:41 +08:00
|
|
|
// ResultRegDests can be also populated by addReturnRegisterOutputs() above,
|
|
|
|
// in which case its size may grow.
|
|
|
|
assert(ResultTypeRequiresCast.size() <= ResultRegDests.size());
|
2009-05-03 16:21:20 +08:00
|
|
|
for (unsigned i = 0, e = RegResults.size(); i != e; ++i) {
|
|
|
|
llvm::Value *Tmp = RegResults[i];
|
2021-07-28 23:40:59 +08:00
|
|
|
llvm::Type *TruncTy = ResultTruncRegTypes[i];
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2009-05-03 16:21:20 +08:00
|
|
|
// If the result type of the LLVM IR asm doesn't match the result type of
|
|
|
|
// the expression, do the conversion.
|
|
|
|
if (ResultRegTypes[i] != ResultTruncRegTypes[i]) {
|
2012-06-21 01:43:05 +08:00
|
|
|
|
2010-04-24 01:27:29 +08:00
|
|
|
// Truncate the integer result to the right size, note that TruncTy can be
|
|
|
|
// a pointer.
|
|
|
|
if (TruncTy->isFloatingPointTy())
|
|
|
|
Tmp = Builder.CreateFPTrunc(Tmp, TruncTy);
|
2010-04-24 12:55:02 +08:00
|
|
|
else if (TruncTy->isPointerTy() && Tmp->getType()->isIntegerTy()) {
|
2012-10-09 00:25:52 +08:00
|
|
|
uint64_t ResSize = CGM.getDataLayout().getTypeSizeInBits(TruncTy);
|
2011-02-08 16:22:06 +08:00
|
|
|
Tmp = Builder.CreateTrunc(Tmp,
|
|
|
|
llvm::IntegerType::get(getLLVMContext(), (unsigned)ResSize));
|
2009-05-03 16:21:20 +08:00
|
|
|
Tmp = Builder.CreateIntToPtr(Tmp, TruncTy);
|
2010-04-24 12:55:02 +08:00
|
|
|
} else if (Tmp->getType()->isPointerTy() && TruncTy->isIntegerTy()) {
|
2012-10-09 00:25:52 +08:00
|
|
|
uint64_t TmpSize =CGM.getDataLayout().getTypeSizeInBits(Tmp->getType());
|
2011-02-08 16:22:06 +08:00
|
|
|
Tmp = Builder.CreatePtrToInt(Tmp,
|
|
|
|
llvm::IntegerType::get(getLLVMContext(), (unsigned)TmpSize));
|
2010-04-24 12:55:02 +08:00
|
|
|
Tmp = Builder.CreateTrunc(Tmp, TruncTy);
|
|
|
|
} else if (TruncTy->isIntegerTy()) {
|
2017-09-12 19:05:42 +08:00
|
|
|
Tmp = Builder.CreateZExtOrTrunc(Tmp, TruncTy);
|
2010-10-30 07:12:32 +08:00
|
|
|
} else if (TruncTy->isVectorTy()) {
|
|
|
|
Tmp = Builder.CreateBitCast(Tmp, TruncTy);
|
2009-05-03 16:21:20 +08:00
|
|
|
}
|
2009-05-01 08:16:04 +08:00
|
|
|
}
|
2009-09-09 23:08:12 +08:00
|
|
|
|
2019-08-29 19:21:41 +08:00
|
|
|
LValue Dest = ResultRegDests[i];
|
|
|
|
// ResultTypeRequiresCast elements correspond to the first
|
|
|
|
// ResultTypeRequiresCast.size() elements of RegResults.
|
|
|
|
if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) {
|
|
|
|
unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]);
|
2022-02-14 21:18:04 +08:00
|
|
|
Address A = Builder.CreateElementBitCast(Dest.getAddress(*this),
|
|
|
|
ResultRegTypes[i]);
|
2021-07-28 23:40:59 +08:00
|
|
|
if (getTargetHooks().isScalarizableAsmOperand(*this, TruncTy)) {
|
|
|
|
Builder.CreateStore(Tmp, A);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-08-29 19:21:41 +08:00
|
|
|
QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false);
|
|
|
|
if (Ty.isNull()) {
|
|
|
|
const Expr *OutExpr = S.getOutputExpr(i);
|
2022-03-23 11:24:54 +08:00
|
|
|
CGM.getDiags().Report(OutExpr->getExprLoc(),
|
|
|
|
diag::err_store_value_to_reg);
|
2019-08-29 19:21:41 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
Dest = MakeAddrLValue(A, Ty);
|
|
|
|
}
|
|
|
|
EmitStoreThroughLValue(RValue::get(Tmp), Dest);
|
2009-05-01 08:16:04 +08:00
|
|
|
}
|
2008-02-06 00:35:33 +08:00
|
|
|
}
|
2013-04-17 02:53:08 +08:00
|
|
|
|
2014-10-29 20:21:55 +08:00
|
|
|
LValue CodeGenFunction::InitCapturedStruct(const CapturedStmt &S) {
|
2013-05-10 03:17:11 +08:00
|
|
|
const RecordDecl *RD = S.getCapturedRecordDecl();
|
2014-10-29 20:21:55 +08:00
|
|
|
QualType RecordTy = getContext().getRecordType(RD);
|
2013-05-10 03:17:11 +08:00
|
|
|
|
|
|
|
// Initialize the captured struct.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
LValue SlotLV =
|
|
|
|
MakeAddrLValue(CreateMemTemp(RecordTy, "agg.captured"), RecordTy);
|
2013-05-10 03:17:11 +08:00
|
|
|
|
|
|
|
RecordDecl::field_iterator CurField = RD->field_begin();
|
2015-07-18 02:21:37 +08:00
|
|
|
for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
|
|
|
|
E = S.capture_init_end();
|
2013-05-10 03:17:11 +08:00
|
|
|
I != E; ++I, ++CurField) {
|
2014-10-29 20:21:55 +08:00
|
|
|
LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField);
|
|
|
|
if (CurField->hasCapturedVLAType()) {
|
2020-08-19 21:04:44 +08:00
|
|
|
EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV);
|
2014-10-29 20:21:55 +08:00
|
|
|
} else {
|
2016-12-14 08:03:17 +08:00
|
|
|
EmitInitializerForField(*CurField, LV, *I);
|
2014-10-29 20:21:55 +08:00
|
|
|
}
|
2013-05-10 03:17:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return SlotLV;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Generate an outlined function for the body of a CapturedStmt, store any
|
|
|
|
/// captured variables into the captured struct, and call the outlined function.
|
|
|
|
llvm::Function *
|
|
|
|
CodeGenFunction::EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K) {
|
2014-10-29 20:21:55 +08:00
|
|
|
LValue CapStruct = InitCapturedStruct(S);
|
2013-05-10 03:17:11 +08:00
|
|
|
|
|
|
|
// Emit the CapturedDecl
|
|
|
|
CodeGenFunction CGF(CGM, true);
|
2015-06-24 11:35:38 +08:00
|
|
|
CGCapturedStmtRAII CapInfoRAII(CGF, new CGCapturedStmtInfo(S, K));
|
2014-06-30 10:55:54 +08:00
|
|
|
llvm::Function *F = CGF.GenerateCapturedStmtFunction(S);
|
2013-05-10 03:17:11 +08:00
|
|
|
delete CGF.CapturedStmtInfo;
|
|
|
|
|
|
|
|
// Emit call to the helper function.
|
2019-12-04 07:17:01 +08:00
|
|
|
EmitCallOrInvoke(F, CapStruct.getPointer(*this));
|
2013-05-10 03:17:11 +08:00
|
|
|
|
|
|
|
return F;
|
|
|
|
}
|
|
|
|
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address CodeGenFunction::GenerateCapturedStmtArgument(const CapturedStmt &S) {
|
2014-10-29 20:21:55 +08:00
|
|
|
LValue CapStruct = InitCapturedStruct(S);
|
2019-12-04 07:17:01 +08:00
|
|
|
return CapStruct.getAddress(*this);
|
2014-05-06 18:08:46 +08:00
|
|
|
}
|
|
|
|
|
2013-05-10 03:17:11 +08:00
|
|
|
/// Creates the outlined function for a CapturedStmt.
|
|
|
|
llvm::Function *
|
2014-06-30 10:55:54 +08:00
|
|
|
CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
|
2013-05-10 03:17:11 +08:00
|
|
|
assert(CapturedStmtInfo &&
|
|
|
|
"CapturedStmtInfo should be set when generating the captured function");
|
2014-06-30 10:55:54 +08:00
|
|
|
const CapturedDecl *CD = S.getCapturedDecl();
|
|
|
|
const RecordDecl *RD = S.getCapturedRecordDecl();
|
2018-08-10 05:08:08 +08:00
|
|
|
SourceLocation Loc = S.getBeginLoc();
|
2014-06-30 10:55:54 +08:00
|
|
|
assert(CD->hasBody() && "missing CapturedDecl body");
|
2013-05-10 03:17:11 +08:00
|
|
|
|
|
|
|
// Build the argument list.
|
|
|
|
ASTContext &Ctx = CGM.getContext();
|
|
|
|
FunctionArgList Args;
|
|
|
|
Args.append(CD->param_begin(), CD->param_end());
|
|
|
|
|
|
|
|
// Create the function declaration.
|
|
|
|
const CGFunctionInfo &FuncInfo =
|
2016-03-11 12:30:31 +08:00
|
|
|
CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
|
2013-05-10 03:17:11 +08:00
|
|
|
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
|
|
|
|
|
|
|
|
llvm::Function *F =
|
|
|
|
llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
|
|
|
|
CapturedStmtInfo->getHelperName(), &CGM.getModule());
|
|
|
|
CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
|
2015-03-10 13:15:26 +08:00
|
|
|
if (CD->isNothrow())
|
|
|
|
F->addFnAttr(llvm::Attribute::NoUnwind);
|
2013-05-10 03:17:11 +08:00
|
|
|
|
|
|
|
// Generate the function.
|
2018-08-10 05:08:08 +08:00
|
|
|
StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
|
|
|
|
CD->getBody()->getBeginLoc());
|
2013-05-10 03:17:11 +08:00
|
|
|
// Set the context parameter in CapturedStmtInfo.
|
Compute and preserve alignment more faithfully in IR-generation.
Introduce an Address type to bundle a pointer value with an
alignment. Introduce APIs on CGBuilderTy to work with Address
values. Change core APIs on CGF/CGM to traffic in Address where
appropriate. Require alignments to be non-zero. Update a ton
of code to compute and propagate alignment information.
As part of this, I've promoted CGBuiltin's EmitPointerWithAlignment
helper function to CGF and made use of it in a number of places in
the expression emitter.
The end result is that we should now be significantly more correct
when performing operations on objects that are locally known to
be under-aligned. Since alignment is not reliably tracked in the
type system, there are inherent limits to this, but at least we
are no longer confused by standard operations like derived-to-base
conversions and array-to-pointer decay. I've also fixed a large
number of bugs where we were applying the complete-object alignment
to a pointer instead of the non-virtual alignment, although most of
these were hidden by the very conservative approach we took with
member alignment.
Also, because IRGen now reliably asserts on zero alignments, we
should no longer be subject to an absurd but frustrating recurring
bug where an incomplete type would report a zero alignment and then
we'd naively do a alignmentAtOffset on it and emit code using an
alignment equal to the largest power-of-two factor of the offset.
We should also now be emitting much more aggressive alignment
attributes in the presence of over-alignment. In particular,
field access now uses alignmentAtOffset instead of min.
Several times in this patch, I had to change the existing
code-generation pattern in order to more effectively use
the Address APIs. For the most part, this seems to be a strict
improvement, like doing pointer arithmetic with GEPs instead of
ptrtoint. That said, I've tried very hard to not change semantics,
but it is likely that I've failed in a few places, for which I
apologize.
ABIArgInfo now always carries the assumed alignment of indirect and
indirect byval arguments. In order to cut down on what was already
a dauntingly large patch, I changed the code to never set align
attributes in the IR on non-byval indirect arguments. That is,
we still generate code which assumes that indirect arguments have
the given alignment, but we don't express this information to the
backend except where it's semantically required (i.e. on byvals).
This is likely a minor regression for those targets that did provide
this information, but it'll be trivial to add it back in a later
patch.
I partially punted on applying this work to CGBuiltin. Please
do not add more uses of the CreateDefaultAligned{Load,Store}
APIs; they will be going away eventually.
llvm-svn: 246985
2015-09-08 16:05:57 +08:00
|
|
|
Address DeclPtr = GetAddrOfLocalVar(CD->getContextParam());
|
2013-05-10 03:17:11 +08:00
|
|
|
CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr));
|
|
|
|
|
2014-06-30 10:55:54 +08:00
|
|
|
// Initialize variable-length arrays.
|
2014-10-29 20:21:55 +08:00
|
|
|
LValue Base = MakeNaturalAlignAddrLValue(CapturedStmtInfo->getContextValue(),
|
|
|
|
Ctx.getTagDeclType(RD));
|
|
|
|
for (auto *FD : RD->fields()) {
|
|
|
|
if (FD->hasCapturedVLAType()) {
|
2018-08-10 05:08:08 +08:00
|
|
|
auto *ExprArg =
|
|
|
|
EmitLoadOfLValue(EmitLValueForField(Base, FD), S.getBeginLoc())
|
|
|
|
.getScalarVal();
|
2014-10-29 20:21:55 +08:00
|
|
|
auto VAT = FD->getCapturedVLAType();
|
|
|
|
VLASizeMap[VAT->getSizeExpr()] = ExprArg;
|
|
|
|
}
|
|
|
|
}
|
2014-06-30 10:55:54 +08:00
|
|
|
|
2013-05-10 03:17:11 +08:00
|
|
|
// If 'this' is captured, load it into CXXThisValue.
|
|
|
|
if (CapturedStmtInfo->isCXXThisExprCaptured()) {
|
|
|
|
FieldDecl *FD = CapturedStmtInfo->getThisFieldDecl();
|
2014-10-29 20:21:55 +08:00
|
|
|
LValue ThisLValue = EmitLValueForField(Base, FD);
|
2013-10-02 10:29:49 +08:00
|
|
|
CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal();
|
2013-05-10 03:17:11 +08:00
|
|
|
}
|
|
|
|
|
2015-12-06 22:32:39 +08:00
|
|
|
PGO.assignRegionCounters(GlobalDecl(CD), F);
|
2013-05-10 03:17:11 +08:00
|
|
|
CapturedStmtInfo->EmitBody(*this, CD->getBody());
|
|
|
|
FinishFunction(CD->getBodyRBrace());
|
|
|
|
|
|
|
|
return F;
|
2013-04-17 02:53:08 +08:00
|
|
|
}
|