forked from mindspore-Ecosystem/mindspore
fix the issuse, repeated initialization can't exit.
This commit is contained in:
parent
42111a8033
commit
5c25cb6f0c
|
@ -240,38 +240,6 @@ void GPUProfiler::EventLog(const Event &event) {
|
||||||
<< ",stream_id:" << event.stream_id << ",cb_id:" << event.cb_id;
|
<< ",stream_id:" << event.stream_id << ",cb_id:" << event.cb_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void fillActivityInfo(OpInfo *opInfo, const Event &event) {
|
|
||||||
if (event.api_type != CUPTIApiType::kActivity) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
switch (event.activity_type) {
|
|
||||||
case ActivityType::kKernel:
|
|
||||||
opInfo->kernel_info.registers_per_thread = event.kernel_info.registers_per_thread;
|
|
||||||
opInfo->kernel_info.static_shared_memory = event.kernel_info.static_shared_memory;
|
|
||||||
opInfo->kernel_info.dynamic_shared_memory = event.kernel_info.dynamic_shared_memory;
|
|
||||||
opInfo->kernel_info.block_x = event.kernel_info.block_x;
|
|
||||||
opInfo->kernel_info.block_y = event.kernel_info.block_y;
|
|
||||||
opInfo->kernel_info.block_z = event.kernel_info.block_z;
|
|
||||||
opInfo->kernel_info.grid_x = event.kernel_info.grid_x;
|
|
||||||
opInfo->kernel_info.grid_y = event.kernel_info.grid_y;
|
|
||||||
opInfo->kernel_info.grid_z = event.kernel_info.grid_z;
|
|
||||||
break;
|
|
||||||
case ActivityType::kMemcpyH2D:
|
|
||||||
case ActivityType::kMemcpyD2H:
|
|
||||||
case ActivityType::kMemcpyH2A:
|
|
||||||
case ActivityType::kMemcpyA2H:
|
|
||||||
case ActivityType::kMemcpyA2D:
|
|
||||||
case ActivityType::kMemcpyD2A:
|
|
||||||
case ActivityType::kMemcpyP2P:
|
|
||||||
case ActivityType::kMemcpyH2H:
|
|
||||||
case ActivityType::kMemset:
|
|
||||||
case ActivityType::kMemcpyUnknown:
|
|
||||||
opInfo->memcpy_info.bytes = event.memcpy_info.bytes;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GPUProfiler::OpsParser() {
|
void GPUProfiler::OpsParser() {
|
||||||
MS_LOG(INFO) << "Count the number of events size:" << events_.size()
|
MS_LOG(INFO) << "Count the number of events size:" << events_.size()
|
||||||
<< " callback api:" << cupti_callback_events_count_ << " activity:" << cupti_activity_events_count_;
|
<< " callback api:" << cupti_callback_events_count_ << " activity:" << cupti_activity_events_count_;
|
||||||
|
@ -311,7 +279,6 @@ void GPUProfiler::OpsParser() {
|
||||||
iter->second.op_kernel_count += 1;
|
iter->second.op_kernel_count += 1;
|
||||||
// The time unit from ns to us
|
// The time unit from ns to us
|
||||||
iter->second.cupti_activity_time += (event.end_time_stamp - event.start_time_stamp) / kTimeUnit;
|
iter->second.cupti_activity_time += (event.end_time_stamp - event.start_time_stamp) / kTimeUnit;
|
||||||
fillActivityInfo(&iter->second, event);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -322,9 +289,7 @@ void GPUProfiler::OpsParser() {
|
||||||
|
|
||||||
MS_LOG(DEBUG) << "GPU_profiler, op_name, op_count , kernel_count, kernel_api_count,|"
|
MS_LOG(DEBUG) << "GPU_profiler, op_name, op_count , kernel_count, kernel_api_count,|"
|
||||||
",cupti_activity_total_time, cupti_api_call_total_time, op_host_cost_total_time,|"
|
",cupti_activity_total_time, cupti_api_call_total_time, op_host_cost_total_time,|"
|
||||||
",cupti_activity_average_time,cupti_api_call_average_time, op_host_cost_average_time,|"
|
",cupti_activity_average_time,cupti_api_call_average_time, op_host_cost_average_time"
|
||||||
",mem_bytes,registers_per_thread,static_shared_memory,dynamic_shared_memory"
|
|
||||||
",block_x,block_y,block_z,grid_x,grid_y,grid_z"
|
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
|
|
||||||
std::vector<std::pair<std::string, OpInfo>> order_vec(op_info_map_.begin(), op_info_map_.end());
|
std::vector<std::pair<std::string, OpInfo>> order_vec(op_info_map_.begin(), op_info_map_.end());
|
||||||
|
@ -342,13 +307,7 @@ void GPUProfiler::OpsParser() {
|
||||||
<< iter->second.op_host_cost_time << ","
|
<< iter->second.op_host_cost_time << ","
|
||||||
<< "|," << round(iter->second.cupti_activity_time / iter->second.op_count) << ","
|
<< "|," << round(iter->second.cupti_activity_time / iter->second.op_count) << ","
|
||||||
<< round(iter->second.cupti_api_call_time / iter->second.op_count) << ","
|
<< round(iter->second.cupti_api_call_time / iter->second.op_count) << ","
|
||||||
<< round(iter->second.op_host_cost_time / iter->second.op_count) << ","
|
<< round(iter->second.op_host_cost_time / iter->second.op_count) << std::endl;
|
||||||
<< "|," << iter->second.memcpy_info.bytes << "," << iter->second.kernel_info.registers_per_thread
|
|
||||||
<< "," << iter->second.kernel_info.static_shared_memory << ","
|
|
||||||
<< iter->second.kernel_info.dynamic_shared_memory << "," << iter->second.kernel_info.block_x << ","
|
|
||||||
<< iter->second.kernel_info.block_y << "," << iter->second.kernel_info.block_z << ","
|
|
||||||
<< iter->second.kernel_info.grid_x << "," << iter->second.kernel_info.grid_y << ","
|
|
||||||
<< iter->second.kernel_info.grid_z << std::endl;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -379,6 +338,11 @@ void CUPTIAPI ActivityProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *b
|
||||||
|
|
||||||
void GPUProfiler::Init(const std::string &profileDataPath = "") {
|
void GPUProfiler::Init(const std::string &profileDataPath = "") {
|
||||||
MS_LOG(INFO) << "Initialize GPU Profiling";
|
MS_LOG(INFO) << "Initialize GPU Profiling";
|
||||||
|
if (subscriber_ != nullptr) {
|
||||||
|
StopCUPTI();
|
||||||
|
MS_LOG(EXCEPTION)
|
||||||
|
<< "Repeated initialization, Please check whether you have created the Profiler object multiple times";
|
||||||
|
}
|
||||||
CHECK_CUPTI_RET_WITH_EXCEPT(CuptiSubscribe(&subscriber_, (CUpti_CallbackFunc)CUPTICallBackFunc, this),
|
CHECK_CUPTI_RET_WITH_EXCEPT(CuptiSubscribe(&subscriber_, (CUpti_CallbackFunc)CUPTICallBackFunc, this),
|
||||||
"CuptiSubscribe");
|
"CuptiSubscribe");
|
||||||
CHECK_CUPTI_RET_WITH_EXCEPT(CuptiEnableDomain(1, subscriber_, CUPTI_CB_DOMAIN_DRIVER_API), "CuptiEnableDomain");
|
CHECK_CUPTI_RET_WITH_EXCEPT(CuptiEnableDomain(1, subscriber_, CUPTI_CB_DOMAIN_DRIVER_API), "CuptiEnableDomain");
|
||||||
|
@ -516,137 +480,137 @@ void CUPTIAPI ActivityProcessBuffer(CUcontext ctx, uint32_t streamId, uint8_t *b
|
||||||
GPUProfiler::GetInstance()->ProcessBuffer(ctx, streamId, buffer, size, validSize);
|
GPUProfiler::GetInstance()->ProcessBuffer(ctx, streamId, buffer, size, validSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HandleActivityMemcpyRecord(Event *profillingData, CUpti_Activity *record) {
|
void HandleActivityMemcpyRecord(Event *profilingData, CUpti_Activity *record) {
|
||||||
CUpti_ActivityMemcpy *memcpy = reinterpret_cast<CUpti_ActivityMemcpy *>(record);
|
CUpti_ActivityMemcpy *memcpy = reinterpret_cast<CUpti_ActivityMemcpy *>(record);
|
||||||
switch (memcpy->copyKind) {
|
switch (memcpy->copyKind) {
|
||||||
case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD:
|
case CUPTI_ACTIVITY_MEMCPY_KIND_HTOD:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyH2D;
|
profilingData->activity_type = ActivityType::kMemcpyH2D;
|
||||||
profillingData->kernel_name = "MemcpyH2D";
|
profilingData->kernel_name = "MemcpyH2D";
|
||||||
break;
|
break;
|
||||||
case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH:
|
case CUPTI_ACTIVITY_MEMCPY_KIND_DTOH:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyD2H;
|
profilingData->activity_type = ActivityType::kMemcpyD2H;
|
||||||
profillingData->kernel_name = "MemcpyD2H";
|
profilingData->kernel_name = "MemcpyD2H";
|
||||||
break;
|
break;
|
||||||
case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA:
|
case CUPTI_ACTIVITY_MEMCPY_KIND_HTOA:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyH2A;
|
profilingData->activity_type = ActivityType::kMemcpyH2A;
|
||||||
profillingData->kernel_name = "MemcpyH2A";
|
profilingData->kernel_name = "MemcpyH2A";
|
||||||
break;
|
break;
|
||||||
case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH:
|
case CUPTI_ACTIVITY_MEMCPY_KIND_ATOH:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyA2H;
|
profilingData->activity_type = ActivityType::kMemcpyA2H;
|
||||||
profillingData->kernel_name = "MemcpyA2H";
|
profilingData->kernel_name = "MemcpyA2H";
|
||||||
break;
|
break;
|
||||||
case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD:
|
case CUPTI_ACTIVITY_MEMCPY_KIND_ATOD:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyA2D;
|
profilingData->activity_type = ActivityType::kMemcpyA2D;
|
||||||
profillingData->kernel_name = "MemcpyA2D";
|
profilingData->kernel_name = "MemcpyA2D";
|
||||||
break;
|
break;
|
||||||
case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA:
|
case CUPTI_ACTIVITY_MEMCPY_KIND_DTOA:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyD2A;
|
profilingData->activity_type = ActivityType::kMemcpyD2A;
|
||||||
profillingData->kernel_name = "MemcpyD2A";
|
profilingData->kernel_name = "MemcpyD2A";
|
||||||
break;
|
break;
|
||||||
case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD:
|
case CUPTI_ACTIVITY_MEMCPY_KIND_DTOD:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyD2D;
|
profilingData->activity_type = ActivityType::kMemcpyD2D;
|
||||||
profillingData->kernel_name = "MemcpyD2D";
|
profilingData->kernel_name = "MemcpyD2D";
|
||||||
break;
|
break;
|
||||||
case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH:
|
case CUPTI_ACTIVITY_MEMCPY_KIND_HTOH:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyH2H;
|
profilingData->activity_type = ActivityType::kMemcpyH2H;
|
||||||
profillingData->kernel_name = "MemcpyH2H";
|
profilingData->kernel_name = "MemcpyH2H";
|
||||||
break;
|
break;
|
||||||
case CUPTI_ACTIVITY_MEMCPY_KIND_PTOP:
|
case CUPTI_ACTIVITY_MEMCPY_KIND_PTOP:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyP2P;
|
profilingData->activity_type = ActivityType::kMemcpyP2P;
|
||||||
profillingData->kernel_name = "MemcpyP2P";
|
profilingData->kernel_name = "MemcpyP2P";
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
profillingData->activity_type = ActivityType::kMemcpyUnknown;
|
profilingData->activity_type = ActivityType::kMemcpyUnknown;
|
||||||
profillingData->kernel_name = "MemcpyUnknown";
|
profilingData->kernel_name = "MemcpyUnknown";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
profillingData->kernel_type = "cuMemcpy";
|
profilingData->kernel_type = "cuMemcpy";
|
||||||
profillingData->api_type = CUPTIApiType::kActivity;
|
profilingData->api_type = CUPTIApiType::kActivity;
|
||||||
profillingData->start_time_stamp = memcpy->start;
|
profilingData->start_time_stamp = memcpy->start;
|
||||||
profillingData->end_time_stamp = memcpy->end;
|
profilingData->end_time_stamp = memcpy->end;
|
||||||
profillingData->device_id = memcpy->deviceId;
|
profilingData->device_id = memcpy->deviceId;
|
||||||
profillingData->context_id = memcpy->contextId;
|
profilingData->context_id = memcpy->contextId;
|
||||||
profillingData->stream_id = memcpy->streamId;
|
profilingData->stream_id = memcpy->streamId;
|
||||||
profillingData->correlation_id = memcpy->correlationId;
|
profilingData->correlation_id = memcpy->correlationId;
|
||||||
profillingData->memcpy_info.bytes = memcpy->bytes;
|
profilingData->memcpy_info.bytes = memcpy->bytes;
|
||||||
profillingData->memcpy_info.src_kind = memcpy->srcKind;
|
profilingData->memcpy_info.src_kind = memcpy->srcKind;
|
||||||
profillingData->memcpy_info.dst_kind = memcpy->dstKind;
|
profilingData->memcpy_info.dst_kind = memcpy->dstKind;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HandleActivityMemcpy2Record(Event *profillingData, CUpti_Activity *record) {
|
void HandleActivityMemcpy2Record(Event *profilingData, CUpti_Activity *record) {
|
||||||
CUpti_ActivityMemcpy2 *memcpyP2P = reinterpret_cast<CUpti_ActivityMemcpy2 *>(record);
|
CUpti_ActivityMemcpy2 *memcpyP2P = reinterpret_cast<CUpti_ActivityMemcpy2 *>(record);
|
||||||
profillingData->activity_type = ActivityType::kMemcpyP2P;
|
profilingData->activity_type = ActivityType::kMemcpyP2P;
|
||||||
profillingData->kernel_name = "MemcpyP2P";
|
profilingData->kernel_name = "MemcpyP2P";
|
||||||
profillingData->kernel_type = "cuMemcpy";
|
profilingData->kernel_type = "cuMemcpy";
|
||||||
profillingData->api_type = CUPTIApiType::kActivity;
|
profilingData->api_type = CUPTIApiType::kActivity;
|
||||||
profillingData->start_time_stamp = memcpyP2P->start;
|
profilingData->start_time_stamp = memcpyP2P->start;
|
||||||
profillingData->end_time_stamp = memcpyP2P->end;
|
profilingData->end_time_stamp = memcpyP2P->end;
|
||||||
profillingData->device_id = memcpyP2P->deviceId;
|
profilingData->device_id = memcpyP2P->deviceId;
|
||||||
profillingData->context_id = memcpyP2P->contextId;
|
profilingData->context_id = memcpyP2P->contextId;
|
||||||
profillingData->stream_id = memcpyP2P->streamId;
|
profilingData->stream_id = memcpyP2P->streamId;
|
||||||
profillingData->correlation_id = memcpyP2P->correlationId;
|
profilingData->correlation_id = memcpyP2P->correlationId;
|
||||||
profillingData->memcpy_info.bytes = memcpyP2P->bytes;
|
profilingData->memcpy_info.bytes = memcpyP2P->bytes;
|
||||||
profillingData->memcpy_info.src_kind = memcpyP2P->srcKind;
|
profilingData->memcpy_info.src_kind = memcpyP2P->srcKind;
|
||||||
profillingData->memcpy_info.dst_kind = memcpyP2P->dstKind;
|
profilingData->memcpy_info.dst_kind = memcpyP2P->dstKind;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HandleActivityMemsetRecord(Event *profillingData, CUpti_Activity *record) {
|
void HandleActivityMemsetRecord(Event *profilingData, CUpti_Activity *record) {
|
||||||
CUpti_ActivityMemset *memset = reinterpret_cast<CUpti_ActivityMemset *>(record);
|
CUpti_ActivityMemset *memset = reinterpret_cast<CUpti_ActivityMemset *>(record);
|
||||||
profillingData->activity_type = ActivityType::kMemset;
|
profilingData->activity_type = ActivityType::kMemset;
|
||||||
profillingData->kernel_name = "MemorySet";
|
profilingData->kernel_name = "MemorySet";
|
||||||
profillingData->api_type = CUPTIApiType::kActivity;
|
profilingData->api_type = CUPTIApiType::kActivity;
|
||||||
profillingData->start_time_stamp = memset->start;
|
profilingData->start_time_stamp = memset->start;
|
||||||
profillingData->end_time_stamp = memset->end;
|
profilingData->end_time_stamp = memset->end;
|
||||||
profillingData->device_id = memset->deviceId;
|
profilingData->device_id = memset->deviceId;
|
||||||
profillingData->context_id = memset->contextId;
|
profilingData->context_id = memset->contextId;
|
||||||
profillingData->stream_id = memset->streamId;
|
profilingData->stream_id = memset->streamId;
|
||||||
profillingData->correlation_id = memset->correlationId;
|
profilingData->correlation_id = memset->correlationId;
|
||||||
profillingData->memcpy_info.bytes = memset->bytes;
|
profilingData->memcpy_info.bytes = memset->bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HandleActivityKernelRecord(Event *profillingData, CUpti_Activity *record) {
|
void HandleActivityKernelRecord(Event *profilingData, CUpti_Activity *record) {
|
||||||
CUpti_ActivityKernel4 *kernel = reinterpret_cast<CUpti_ActivityKernel4 *>(record);
|
CUpti_ActivityKernel4 *kernel = reinterpret_cast<CUpti_ActivityKernel4 *>(record);
|
||||||
profillingData->activity_type = ActivityType::kKernel;
|
profilingData->activity_type = ActivityType::kKernel;
|
||||||
profillingData->api_type = CUPTIApiType::kActivity;
|
profilingData->api_type = CUPTIApiType::kActivity;
|
||||||
profillingData->kernel_name = GetKernelFunc(kernel->name);
|
profilingData->kernel_name = GetKernelFunc(kernel->name);
|
||||||
profillingData->kernel_type = "cuLaunchKernel";
|
profilingData->kernel_type = "cuLaunchKernel";
|
||||||
profillingData->start_time_stamp = kernel->start;
|
profilingData->start_time_stamp = kernel->start;
|
||||||
profillingData->end_time_stamp = kernel->end;
|
profilingData->end_time_stamp = kernel->end;
|
||||||
profillingData->device_id = kernel->deviceId;
|
profilingData->device_id = kernel->deviceId;
|
||||||
profillingData->context_id = kernel->contextId;
|
profilingData->context_id = kernel->contextId;
|
||||||
profillingData->stream_id = kernel->streamId;
|
profilingData->stream_id = kernel->streamId;
|
||||||
profillingData->correlation_id = kernel->correlationId;
|
profilingData->correlation_id = kernel->correlationId;
|
||||||
profillingData->kernel_info.registers_per_thread = kernel->registersPerThread;
|
profilingData->kernel_info.registers_per_thread = kernel->registersPerThread;
|
||||||
profillingData->kernel_info.static_shared_memory = kernel->staticSharedMemory;
|
profilingData->kernel_info.static_shared_memory = kernel->staticSharedMemory;
|
||||||
profillingData->kernel_info.dynamic_shared_memory = kernel->dynamicSharedMemory;
|
profilingData->kernel_info.dynamic_shared_memory = kernel->dynamicSharedMemory;
|
||||||
profillingData->kernel_info.block_x = kernel->blockX;
|
profilingData->kernel_info.block_x = kernel->blockX;
|
||||||
profillingData->kernel_info.block_y = kernel->blockY;
|
profilingData->kernel_info.block_y = kernel->blockY;
|
||||||
profillingData->kernel_info.block_z = kernel->blockZ;
|
profilingData->kernel_info.block_z = kernel->blockZ;
|
||||||
profillingData->kernel_info.grid_x = kernel->gridX;
|
profilingData->kernel_info.grid_x = kernel->gridX;
|
||||||
profillingData->kernel_info.grid_y = kernel->gridY;
|
profilingData->kernel_info.grid_y = kernel->gridY;
|
||||||
profillingData->kernel_info.grid_z = kernel->gridZ;
|
profilingData->kernel_info.grid_z = kernel->gridZ;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUProfiler::HandleActivityRecord(CUpti_Activity *record) {
|
void GPUProfiler::HandleActivityRecord(CUpti_Activity *record) {
|
||||||
PROFILER_ERROR_IF_NULLPTR(record);
|
PROFILER_ERROR_IF_NULLPTR(record);
|
||||||
Event profillingData;
|
Event profilingData;
|
||||||
profillingData.cb_id = 0;
|
profilingData.cb_id = 0;
|
||||||
switch (record->kind) {
|
switch (record->kind) {
|
||||||
case CUPTI_ACTIVITY_KIND_MEMCPY: {
|
case CUPTI_ACTIVITY_KIND_MEMCPY: {
|
||||||
HandleActivityMemcpyRecord(&profillingData, record);
|
HandleActivityMemcpyRecord(&profilingData, record);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CUPTI_ACTIVITY_KIND_MEMCPY2: {
|
case CUPTI_ACTIVITY_KIND_MEMCPY2: {
|
||||||
HandleActivityMemcpy2Record(&profillingData, record);
|
HandleActivityMemcpy2Record(&profilingData, record);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CUPTI_ACTIVITY_KIND_MEMSET: {
|
case CUPTI_ACTIVITY_KIND_MEMSET: {
|
||||||
HandleActivityMemsetRecord(&profillingData, record);
|
HandleActivityMemsetRecord(&profilingData, record);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CUPTI_ACTIVITY_KIND_KERNEL:
|
case CUPTI_ACTIVITY_KIND_KERNEL:
|
||||||
case CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL: {
|
case CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL: {
|
||||||
HandleActivityKernelRecord(&profillingData, record);
|
HandleActivityKernelRecord(&profilingData, record);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -654,7 +618,7 @@ void GPUProfiler::HandleActivityRecord(CUpti_Activity *record) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
AddEvent(std::move(profillingData));
|
AddEvent(std::move(profilingData));
|
||||||
}
|
}
|
||||||
|
|
||||||
void CUPTIAPI GPUProfiler::AllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNumRecords) {
|
void CUPTIAPI GPUProfiler::AllocBuffer(uint8_t **buffer, size_t *size, size_t *maxNumRecords) {
|
||||||
|
|
|
@ -93,9 +93,6 @@ struct OpInfo {
|
||||||
int op_kernel_count = 0;
|
int op_kernel_count = 0;
|
||||||
int op_count = 0;
|
int op_count = 0;
|
||||||
void *stream;
|
void *stream;
|
||||||
|
|
||||||
MemcpyInfo memcpy_info = {0};
|
|
||||||
KernelInfo kernel_info = {0};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BaseTime {
|
struct BaseTime {
|
||||||
|
|
Loading…
Reference in New Issue