forked from OSchip/llvm-project
Expand MUX instructions early on Hexagon
This time with all files included. llvm-svn: 233696
This commit is contained in:
parent
8c4fd2bdeb
commit
c05dff1792
|
@ -15,6 +15,7 @@ add_llvm_target(HexagonCodeGen
|
|||
HexagonAsmPrinter.cpp
|
||||
HexagonCFGOptimizer.cpp
|
||||
HexagonCopyToCombine.cpp
|
||||
HexagonExpandCondsets.cpp
|
||||
HexagonExpandPredSpillCode.cpp
|
||||
HexagonFixupHwLoops.cpp
|
||||
HexagonFrameLowering.cpp
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -845,8 +845,7 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
|
|||
return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
|
||||
}
|
||||
|
||||
int HexagonInstrInfo::
|
||||
getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
|
||||
int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
|
||||
enum Hexagon::PredSense inPredSense;
|
||||
inPredSense = invertPredicate ? Hexagon::PredSense_false :
|
||||
Hexagon::PredSense_true;
|
||||
|
@ -884,7 +883,7 @@ PredicateInstruction(MachineInstr *MI,
|
|||
// This will change MI's opcode to its predicate version.
|
||||
// However, its operand list is still the old one, i.e. the
|
||||
// non-predicate one.
|
||||
MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
|
||||
MI->setDesc(get(getCondOpcode(Opc, invertJump)));
|
||||
|
||||
int oper = -1;
|
||||
unsigned int GAIdx = 0;
|
||||
|
|
|
@ -216,9 +216,7 @@ public:
|
|||
short getNonExtOpcode(const MachineInstr *MI) const;
|
||||
bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
|
||||
bool PredOpcodeHasNot(Opcode_t Opcode) const;
|
||||
|
||||
private:
|
||||
int getMatchingCondBranchOpcode(int Opc, bool sense) const;
|
||||
int getCondOpcode(int Opc, bool sense) const;
|
||||
|
||||
};
|
||||
|
||||
|
|
|
@ -27,11 +27,15 @@
|
|||
using namespace llvm;
|
||||
|
||||
static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
|
||||
cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
|
||||
cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
|
||||
|
||||
static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(false),
|
||||
cl::desc("Disable Hexagon CFG Optimization"));
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(false),
|
||||
cl::desc("Disable Hexagon CFG Optimization"));
|
||||
|
||||
static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
|
||||
cl::init(true), cl::Hidden, cl::ZeroOrMore,
|
||||
cl::desc("Early expansion of MUX"));
|
||||
|
||||
|
||||
/// HexagonTargetMachineModule - Note that this is used on hosts that
|
||||
|
@ -55,6 +59,10 @@ static MachineSchedRegistry
|
|||
SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
|
||||
createVLIWMachineSched);
|
||||
|
||||
namespace llvm {
|
||||
FunctionPass *createHexagonExpandCondsets();
|
||||
}
|
||||
|
||||
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
|
||||
///
|
||||
|
||||
|
@ -79,7 +87,15 @@ namespace {
|
|||
class HexagonPassConfig : public TargetPassConfig {
|
||||
public:
|
||||
HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
|
||||
: TargetPassConfig(TM, PM) {}
|
||||
: TargetPassConfig(TM, PM) {
|
||||
bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None);
|
||||
if (!NoOpt) {
|
||||
if (EnableExpandCondsets) {
|
||||
Pass *Exp = createHexagonExpandCondsets();
|
||||
insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HexagonTargetMachine &getHexagonTargetMachine() const {
|
||||
return getTM<HexagonTargetMachine>();
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
|
||||
|
||||
; CHECK: r{{[0-9]+:[0-9]+}} = #0
|
||||
; CHECK: r{{[0-9]+:[0-9]+}} = #1
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
; CHECK: if{{.*}}add
|
||||
; CHECK: if{{.*}}sub
|
||||
|
||||
define i32 @foo (i1 %a, i32 %b, i32 %c, i32 %d) nounwind {
|
||||
%1 = add i32 %b, %d
|
||||
%2 = sub i32 %c, %d
|
||||
%3 = select i1 %a, i32 %1, i32 %2
|
||||
ret i32 %3
|
||||
}
|
||||
|
|
@ -0,0 +1,131 @@
|
|||
; RUN: llc -O2 < %s
|
||||
; REQUIRES: asserts
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
|
||||
target triple = "hexagon-unknown--elf"
|
||||
|
||||
%struct.cpumask = type { [1 x i32] }
|
||||
%struct.load_weight = type { i32, i32 }
|
||||
|
||||
@sysctl_sched_latency = global i32 6000000, align 4
|
||||
@normalized_sysctl_sched_latency = global i32 6000000, align 4
|
||||
@sysctl_sched_tunable_scaling = global i8 1, align 1
|
||||
@sysctl_sched_min_granularity = global i32 750000, align 4
|
||||
@normalized_sysctl_sched_min_granularity = global i32 750000, align 4
|
||||
@sysctl_sched_wakeup_granularity = global i32 1000000, align 4
|
||||
@normalized_sysctl_sched_wakeup_granularity = global i32 1000000, align 4
|
||||
@sysctl_sched_migration_cost = constant i32 500000, align 4
|
||||
@sysctl_sched_shares_window = global i32 10000000, align 4
|
||||
@sysctl_sched_child_runs_first = common global i32 0, align 4
|
||||
@cpu_online_mask = external constant %struct.cpumask*
|
||||
|
||||
; Function Attrs: noinline nounwind
|
||||
define void @sched_init_granularity() #0 {
|
||||
entry:
|
||||
tail call fastcc void @update_sysctl()
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: noinline nounwind
|
||||
define internal fastcc void @update_sysctl() #0 {
|
||||
entry:
|
||||
%call = tail call i32 @get_update_sysctl_factor()
|
||||
%0 = load i32, i32* @normalized_sysctl_sched_min_granularity, align 4, !tbaa !1
|
||||
%mul = mul i32 %0, %call
|
||||
store i32 %mul, i32* @sysctl_sched_min_granularity, align 4, !tbaa !1
|
||||
%1 = load i32, i32* @normalized_sysctl_sched_latency, align 4, !tbaa !1
|
||||
%mul1 = mul i32 %1, %call
|
||||
store i32 %mul1, i32* @sysctl_sched_latency, align 4, !tbaa !1
|
||||
%2 = load i32, i32* @normalized_sysctl_sched_wakeup_granularity, align 4, !tbaa !1
|
||||
%mul2 = mul i32 %2, %call
|
||||
store i32 %mul2, i32* @sysctl_sched_wakeup_granularity, align 4, !tbaa !1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: noinline nounwind
|
||||
define i32 @calc_delta_mine(i32 %delta_exec, i32 %weight, %struct.load_weight* nocapture %lw) #0 {
|
||||
entry:
|
||||
%cmp = icmp ugt i32 %weight, 1
|
||||
%conv = zext i32 %delta_exec to i64
|
||||
br i1 %cmp, label %if.then, label %if.end, !prof !5
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%conv2 = zext i32 %weight to i64
|
||||
%mul = mul i64 %conv2, %conv
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %entry, %if.then
|
||||
%tmp.0 = phi i64 [ %mul, %if.then ], [ %conv, %entry ]
|
||||
%inv_weight = getelementptr inbounds %struct.load_weight, %struct.load_weight* %lw, i32 0, i32 1
|
||||
%0 = load i32, i32* %inv_weight, align 4, !tbaa !6
|
||||
%tobool4 = icmp eq i32 %0, 0
|
||||
br i1 %tobool4, label %if.then5, label %if.end22
|
||||
|
||||
if.then5: ; preds = %if.end
|
||||
%weight7 = getelementptr inbounds %struct.load_weight, %struct.load_weight* %lw, i32 0, i32 0
|
||||
%1 = load i32, i32* %weight7, align 4, !tbaa !9
|
||||
%lnot9 = icmp eq i32 %1, 0
|
||||
br i1 %lnot9, label %if.then17, label %if.else19, !prof !10
|
||||
|
||||
if.then17: ; preds = %if.then5
|
||||
store i32 -1, i32* %inv_weight, align 4, !tbaa !6
|
||||
br label %if.end22
|
||||
|
||||
if.else19: ; preds = %if.then5
|
||||
%div = udiv i32 -1, %1
|
||||
store i32 %div, i32* %inv_weight, align 4, !tbaa !6
|
||||
br label %if.end22
|
||||
|
||||
if.end22: ; preds = %if.end, %if.then17, %if.else19
|
||||
%2 = phi i32 [ %0, %if.end ], [ -1, %if.then17 ], [ %div, %if.else19 ]
|
||||
%cmp23 = icmp ugt i64 %tmp.0, 4294967295
|
||||
br i1 %cmp23, label %if.then31, label %if.else37, !prof !10
|
||||
|
||||
if.then31: ; preds = %if.end22
|
||||
%add = add i64 %tmp.0, 32768
|
||||
%shr = lshr i64 %add, 16
|
||||
%conv33 = zext i32 %2 to i64
|
||||
%mul34 = mul i64 %conv33, %shr
|
||||
%add35 = add i64 %mul34, 32768
|
||||
%shr36 = lshr i64 %add35, 16
|
||||
br label %if.end43
|
||||
|
||||
if.else37: ; preds = %if.end22
|
||||
%conv39 = zext i32 %2 to i64
|
||||
%mul40 = mul i64 %conv39, %tmp.0
|
||||
%add41 = add i64 %mul40, 2147483648
|
||||
%shr42 = lshr i64 %add41, 32
|
||||
br label %if.end43
|
||||
|
||||
if.end43: ; preds = %if.else37, %if.then31
|
||||
%tmp.1 = phi i64 [ %shr36, %if.then31 ], [ %shr42, %if.else37 ]
|
||||
%cmp49 = icmp ult i64 %tmp.1, 2147483647
|
||||
%3 = trunc i64 %tmp.1 to i32
|
||||
%conv51 = select i1 %cmp49, i32 %3, i32 2147483647
|
||||
ret i32 %conv51
|
||||
}
|
||||
|
||||
declare i32 @get_update_sysctl_factor() #0
|
||||
declare i32 @__bitmap_weight(i32*, i32) #1
|
||||
|
||||
attributes #0 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #2 = { nounwind }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
|
||||
!0 = !{!"Clang 3.1"}
|
||||
!1 = !{!2, !2, i64 0}
|
||||
!2 = !{!"int", !3, i64 0}
|
||||
!3 = !{!"omnipotent char", !4, i64 0}
|
||||
!4 = !{!"Simple C/C++ TBAA"}
|
||||
!5 = !{!"branch_weights", i32 64, i32 4}
|
||||
!6 = !{!7, !8, i64 4}
|
||||
!7 = !{!"load_weight", !8, i64 0, !8, i64 4}
|
||||
!8 = !{!"long", !3, i64 0}
|
||||
!9 = !{!7, !8, i64 0}
|
||||
!10 = !{!"branch_weights", i32 4, i32 64}
|
||||
!11 = !{!12, !12, i64 0}
|
||||
!12 = !{!"any pointer", !3, i64 0}
|
||||
!13 = !{!3, !3, i64 0}
|
||||
!14 = !{i32 45854, i32 45878}
|
|
@ -0,0 +1,28 @@
|
|||
; RUN: llc -O2 < %s
|
||||
; REQUIRES: asserts
|
||||
|
||||
target datalayout = "e-m:e-p:32:32-i64:64-a:0-v32:32-n16:32"
|
||||
target triple = "hexagon"
|
||||
|
||||
; Function Attrs: nounwind optsize ssp
|
||||
define internal fastcc void @foo() nounwind {
|
||||
if.else473:
|
||||
%0 = load i64, i64* undef, align 8
|
||||
%sub = sub nsw i64 undef, %0
|
||||
%conv476 = sitofp i64 %sub to double
|
||||
%mul477 = fmul double %conv476, 0x3F50624DE0000000
|
||||
br i1 undef, label %cond.true540, label %cond.end548
|
||||
|
||||
cond.true540:
|
||||
%1 = fptrunc double %mul477 to float
|
||||
%2 = fptosi float %1 to i32
|
||||
br label %cond.end548
|
||||
|
||||
cond.end548:
|
||||
%cond549 = phi i32 [ %2, %cond.true540 ], [ undef, %if.else473 ]
|
||||
call void @bar(i32 %cond549) nounwind
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare void @bar(i32) nounwind
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
|
||||
|
||||
; CHECK: r{{[0-9]+:[0-9]+}} = #0
|
||||
; CHECK: r{{[0-9]+:[0-9]+}} = #1
|
||||
|
|
|
@ -7,4 +7,4 @@ define i32 @foo (i1 %a, i32 %b, i32 %c)
|
|||
ret i32 %1
|
||||
}
|
||||
|
||||
; CHECK: 0000 00400085 004201f4 00c09f52
|
||||
; CHECK: 0000 00400085 00600174 00608274 00c09f52
|
||||
|
|
Loading…
Reference in New Issue