forked from OSchip/llvm-project
[PerfMonitor] Fix rdtscp callsites
Summary: Update all rdtscp callsites in PerfMonitor so that they conform with the signature changes introduced in r341698. Reviewers: grosser, bollu Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D51928 llvm-svn: 341946
This commit is contained in:
parent
e2745b5d86
commit
4beb2f964b
|
@ -71,11 +71,6 @@ private:
|
||||||
/// The value of the cycle counter at the beginning of the last scop.
|
/// The value of the cycle counter at the beginning of the last scop.
|
||||||
llvm::Value *CyclesInScopStartPtr;
|
llvm::Value *CyclesInScopStartPtr;
|
||||||
|
|
||||||
/// A memory location which serves as argument of the RDTSCP function.
|
|
||||||
///
|
|
||||||
/// The value written to this location is currently not used.
|
|
||||||
llvm::Value *RDTSCPWriteLocation;
|
|
||||||
|
|
||||||
/// A global variable, that keeps track if the performance monitor
|
/// A global variable, that keeps track if the performance monitor
|
||||||
/// initialization has already been run.
|
/// initialization has already been run.
|
||||||
llvm::Value *AlreadyInitializedPtr;
|
llvm::Value *AlreadyInitializedPtr;
|
||||||
|
@ -106,7 +101,7 @@ private:
|
||||||
/// this scop runs.
|
/// this scop runs.
|
||||||
void addScopCounter();
|
void addScopCounter();
|
||||||
|
|
||||||
/// Get a reference to the intrinsic "i64 @llvm.x86.rdtscp(i8*)".
|
/// Get a reference to the intrinsic "{ i64, i32 } @llvm.x86.rdtscp()".
|
||||||
///
|
///
|
||||||
/// The rdtscp function returns the current value of the processor's
|
/// The rdtscp function returns the current value of the processor's
|
||||||
/// time-stamp counter as well as the current CPU identifier. On modern x86
|
/// time-stamp counter as well as the current CPU identifier. On modern x86
|
||||||
|
|
|
@ -113,9 +113,6 @@ void PerfMonitor::addGlobalVariables() {
|
||||||
|
|
||||||
TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
|
TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
|
||||||
&CyclesInScopStartPtr);
|
&CyclesInScopStartPtr);
|
||||||
|
|
||||||
TryRegisterGlobal(M, "__polly_perf_write_loation", Builder.getInt32(0),
|
|
||||||
&RDTSCPWriteLocation);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *InitFunctionName = "__polly_perf_init";
|
static const char *InitFunctionName = "__polly_perf_init";
|
||||||
|
@ -142,9 +139,9 @@ Function *PerfMonitor::insertFinalReporting() {
|
||||||
|
|
||||||
// Measure current cycles and compute final timings.
|
// Measure current cycles and compute final timings.
|
||||||
Function *RDTSCPFn = getRDTSCP();
|
Function *RDTSCPFn = getRDTSCP();
|
||||||
Value *CurrentCycles = Builder.CreateCall(
|
|
||||||
RDTSCPFn,
|
Value *CurrentCycles =
|
||||||
Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
|
Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
|
||||||
Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true);
|
Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true);
|
||||||
Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
|
Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
|
||||||
Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
|
Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
|
||||||
|
@ -255,9 +252,8 @@ Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
|
||||||
if (Supported) {
|
if (Supported) {
|
||||||
// Read the currently cycle counter and store the result for later.
|
// Read the currently cycle counter and store the result for later.
|
||||||
Function *RDTSCPFn = getRDTSCP();
|
Function *RDTSCPFn = getRDTSCP();
|
||||||
Value *CurrentCycles = Builder.CreateCall(
|
Value *CurrentCycles =
|
||||||
RDTSCPFn,
|
Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
|
||||||
Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
|
|
||||||
Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
|
Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
|
||||||
}
|
}
|
||||||
Builder.CreateRetVoid();
|
Builder.CreateRetVoid();
|
||||||
|
@ -271,9 +267,8 @@ void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
|
||||||
|
|
||||||
Builder.SetInsertPoint(InsertBefore);
|
Builder.SetInsertPoint(InsertBefore);
|
||||||
Function *RDTSCPFn = getRDTSCP();
|
Function *RDTSCPFn = getRDTSCP();
|
||||||
Value *CurrentCycles = Builder.CreateCall(
|
Value *CurrentCycles =
|
||||||
RDTSCPFn,
|
Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
|
||||||
Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
|
|
||||||
Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
|
Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -284,9 +279,8 @@ void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
|
||||||
Builder.SetInsertPoint(InsertBefore);
|
Builder.SetInsertPoint(InsertBefore);
|
||||||
Function *RDTSCPFn = getRDTSCP();
|
Function *RDTSCPFn = getRDTSCP();
|
||||||
LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true);
|
LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true);
|
||||||
Value *CurrentCycles = Builder.CreateCall(
|
Value *CurrentCycles =
|
||||||
RDTSCPFn,
|
Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0});
|
||||||
Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
|
|
||||||
Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
|
Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
|
||||||
Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
|
Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
|
||||||
CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
|
CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
|
||||||
|
|
|
@ -36,35 +36,37 @@ return:
|
||||||
; CHECK-NEXT: @__polly_perf_initialized = weak thread_local(initialexec) constant i1 false
|
; CHECK-NEXT: @__polly_perf_initialized = weak thread_local(initialexec) constant i1 false
|
||||||
; CHECK-NEXT: @__polly_perf_cycles_in_scops = weak thread_local(initialexec) constant i64 0
|
; CHECK-NEXT: @__polly_perf_cycles_in_scops = weak thread_local(initialexec) constant i64 0
|
||||||
; CHECK-NEXT: @__polly_perf_cycles_in_scop_start = weak thread_local(initialexec) constant i64 0
|
; CHECK-NEXT: @__polly_perf_cycles_in_scop_start = weak thread_local(initialexec) constant i64 0
|
||||||
; CHECK-NEXT: @__polly_perf_write_loation = weak thread_local(initialexec) constant i32 0
|
|
||||||
|
|
||||||
; CHECK: polly.split_new_and_old: ; preds = %entry
|
; CHECK: polly.split_new_and_old: ; preds = %entry
|
||||||
; CHECK-NEXT: %0 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
|
; CHECK-NEXT: %0 = call { i64, i32 } @llvm.x86.rdtscp()
|
||||||
; CHECK-NEXT: store volatile i64 %0, i64* @__polly_perf_cycles_in_scop_start
|
; CHECK-NEXT: %1 = extractvalue { i64, i32 } %0, 0
|
||||||
|
; CHECK-NEXT: store volatile i64 %1, i64* @__polly_perf_cycles_in_scop_start
|
||||||
|
|
||||||
; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
|
; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
|
||||||
; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
|
; CHECK-NEXT: %6 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
|
||||||
; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
|
; CHECK-NEXT: %7 = call { i64, i32 } @llvm.x86.rdtscp()
|
||||||
; CHECK-NEXT: %7 = sub i64 %6, %5
|
; CHECK-NEXT: %8 = extractvalue { i64, i32 } %7, 0
|
||||||
; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
|
; CHECK-NEXT: %9 = sub i64 %8, %6
|
||||||
; CHECK-NEXT: %9 = add i64 %8, %7
|
; CHECK-NEXT: %10 = load volatile i64, i64* @__polly_perf_cycles_in_scops
|
||||||
; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
|
; CHECK-NEXT: %11 = add i64 %10, %9
|
||||||
|
; CHECK-NEXT: store volatile i64 %11, i64* @__polly_perf_cycles_in_scops
|
||||||
|
|
||||||
|
|
||||||
; CHECK: define weak_odr void @__polly_perf_final() {
|
; CHECK: define weak_odr void @__polly_perf_final() {
|
||||||
; CHECK-NEXT: start:
|
; CHECK-NEXT: start:
|
||||||
; CHECK-NEXT: %0 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
|
; CHECK-NEXT: %0 = call { i64, i32 } @llvm.x86.rdtscp()
|
||||||
; CHECK-NEXT: %1 = load volatile i64, i64* @__polly_perf_cycles_total_start
|
; CHECK-NEXT: %1 = extractvalue { i64, i32 } %0, 0
|
||||||
; CHECK-NEXT: %2 = sub i64 %0, %1
|
; CHECK-NEXT: %2 = load volatile i64, i64* @__polly_perf_cycles_total_start
|
||||||
; CHECK-NEXT: %3 = load volatile i64, i64* @__polly_perf_cycles_in_scops
|
; CHECK-NEXT: %3 = sub i64 %1, %2
|
||||||
; CHECK-NEXT: %4 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @1, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @0, i32 0, i32 0))
|
; CHECK-NEXT: %4 = load volatile i64, i64* @__polly_perf_cycles_in_scops
|
||||||
; CHECK-NEXT: %5 = call i32 @fflush(i8* null)
|
; CHECK-NEXT: %5 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @1, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @0, i32 0, i32 0))
|
||||||
; CHECK-NEXT: %6 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @3, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @2, i32 0, i32 0))
|
; CHECK-NEXT: %6 = call i32 @fflush(i8* null)
|
||||||
; CHECK-NEXT: %7 = call i32 @fflush(i8* null)
|
; CHECK-NEXT: %7 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @3, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @2, i32 0, i32 0))
|
||||||
; CHECK-NEXT: %8 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @6, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @4, i32 0, i32 0), i64 %2, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @5, i32 0, i32 0))
|
; CHECK-NEXT: %8 = call i32 @fflush(i8* null)
|
||||||
; CHECK-NEXT: %9 = call i32 @fflush(i8* null)
|
; CHECK-NEXT: %9 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @6, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @4, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @5, i32 0, i32 0))
|
||||||
; CHECK-NEXT: %10 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0))
|
; CHECK-NEXT: %10 = call i32 @fflush(i8* null)
|
||||||
; CHECK-NEXT: %11 = call i32 @fflush(i8* null)
|
; CHECK-NEXT: %11 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %4, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0))
|
||||||
|
; CHECK-NEXT: %12 = call i32 @fflush(i8* null)
|
||||||
|
|
||||||
|
|
||||||
; CHECK: define weak_odr void @__polly_perf_init() {
|
; CHECK: define weak_odr void @__polly_perf_init() {
|
||||||
|
@ -78,7 +80,8 @@ return:
|
||||||
; CHECK: initbb: ; preds = %start
|
; CHECK: initbb: ; preds = %start
|
||||||
; CHECK-NEXT: store i1 true, i1* @__polly_perf_initialized
|
; CHECK-NEXT: store i1 true, i1* @__polly_perf_initialized
|
||||||
; CHECK-NEXT: %1 = call i32 @atexit(i8* bitcast (void ()* @__polly_perf_final to i8*))
|
; CHECK-NEXT: %1 = call i32 @atexit(i8* bitcast (void ()* @__polly_perf_final to i8*))
|
||||||
; CHECK-NEXT: %2 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
|
; CHECK-NEXT: %2 = call { i64, i32 } @llvm.x86.rdtscp()
|
||||||
; CHECK-NEXT: store volatile i64 %2, i64* @__polly_perf_cycles_total_start
|
; CHECK-NEXT: %3 = extractvalue { i64, i32 } %2, 0
|
||||||
|
; CHECK-NEXT: store volatile i64 %3, i64* @__polly_perf_cycles_total_start
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
; CHECK-NEXT: }
|
; CHECK-NEXT: }
|
||||||
|
|
|
@ -65,11 +65,11 @@ return:
|
||||||
; @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
|
; @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
|
||||||
|
|
||||||
; Bumping up number of cycles in f
|
; Bumping up number of cycles in f
|
||||||
; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
|
; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
|
||||||
; CHECK-NEXT: %11 = add i64 %10, %7
|
; CHECK-NEXT: %13 = add i64 %12, %9
|
||||||
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
|
; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
|
||||||
|
|
||||||
; Bumping up number of cycles in g
|
; Bumping up number of cycles in g
|
||||||
; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
|
; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
|
||||||
; CHECK-NEXT: %11 = add i64 %10, %7
|
; CHECK-NEXT: %13 = add i64 %12, %9
|
||||||
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
|
; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
|
||||||
|
|
|
@ -65,11 +65,11 @@ return:
|
||||||
; CHECK: @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
|
; CHECK: @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
|
||||||
|
|
||||||
; Bumping up number of cycles in f
|
; Bumping up number of cycles in f
|
||||||
; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
|
; CHECK: %14 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
|
||||||
; CHECK-NEXT: %13 = add i64 %12, 1
|
; CHECK-NEXT: %15 = add i64 %14, 1
|
||||||
; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
|
; CHECK-NEXT: store volatile i64 %15, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
|
||||||
|
|
||||||
; Bumping up number of cycles in g
|
; Bumping up number of cycles in g
|
||||||
; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
|
; CHECK: %14 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
|
||||||
; CHECK-NEXT: %13 = add i64 %12, 1
|
; CHECK-NEXT: %15 = add i64 %14, 1
|
||||||
; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
|
; CHECK-NEXT: store volatile i64 %15, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
|
||||||
|
|
Loading…
Reference in New Issue