!26541 Fix file name and field type changes generated by HCCL in profiler.

Merge pull request !26541 from casgj/master_hccl
This commit is contained in:
i-robot 2021-11-23 13:16:28 +00:00 committed by Gitee
commit 875f35d6d8
5 changed files with 865 additions and 9 deletions

View File

@ -177,8 +177,7 @@ class HcclParser:
"""Get the name of communication operators mapping between hccl and step trace."""
dir_path = self._validate_dir_path(self._source_dir)
# The name of the operator in hccl is likeoperatorName_{Ordered_number}_xx_xx.
operators_names_in_hccl = [entry.name for entry in os.scandir(dir_path) if entry.is_dir()
and entry.name.endswith(self._dev_id)]
operators_names_in_hccl = [entry.name for entry in os.scandir(dir_path) if entry.is_dir()]
operators_names_in_hccl_set = set({i.split('_')[0] for i in operators_names_in_hccl})
op_names_in_hccl_dic = dict()
for item in operators_names_in_hccl_set:
@ -226,8 +225,7 @@ class HcclParser:
"""Obtain time-consuming information of all communication operators."""
operators_cost_info = dict()
dir_path = self._validate_dir_path(dir_path)
operators_dir = [entry.name for entry in os.scandir(dir_path) if entry.is_dir()
and entry.name.endswith(self._dev_id)]
operators_dir = [entry.name for entry in os.scandir(dir_path) if entry.is_dir()]
operator_dir_path = [os.path.join(dir_path, operator_dir) for operator_dir in operators_dir]
for operator_dir in operator_dir_path:
operator_cost = self._calculate_communication_operator_cost(operator_dir)
@ -438,9 +436,16 @@ class HcclParser:
rdma_communication_time += rdma_send_cost + notify_record_cost + notify_wait_cost
rdma_communication_wait_time += notify_wait_cost
rdma_size = trace_event[start_index].get("args").get("size")
rdma_size = int(rdma_size, 16) if rdma_size else 0
if rdma_size:
rdma_size = rdma_size if isinstance(rdma_size, int) else int(rdma_size, 16)
else:
rdma_size = 0
notify_record_size = trace_event[start_index + 1].get("args").get("size")
notify_record_size = int(notify_record_size, 16) if notify_record_size else 0
if notify_record_size:
notify_record_size = notify_record_size if isinstance(notify_record_size, int) \
else int(notify_record_size, 16)
else:
notify_record_size = 0
rdma_communication_size += rdma_size + notify_record_size
start_index += 2
start_index += 1
@ -470,7 +475,12 @@ class HcclParser:
task_type = item.get("args").get("task type")
if task_type in (CommunicationInfo.REDUCE_INLINE.value, CommunicationInfo.MEMCPY.value):
sdma_communication_time += item.get("dur", 0)
sdma_size = int(item.get("args").get("size"), 16) if item.get("args").get("size") else 0
sdma_size = item.get("args").get("size")
if sdma_size:
sdma_size = sdma_size if isinstance(sdma_size, int) else int(sdma_size, 16)
else:
sdma_size = 0
sdma_communication_size += sdma_size
# The unit of sdma_bandwidth is KB/s.

View File

@ -0,0 +1,424 @@
{
"device id": "6",
"iteration": 1,
"traceEvents": [
{
"tid": 2,
"pid": "6",
"ts": 616881090071.61,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000090",
"duration estimated": 0.26,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 7,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881090071.97,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000090",
"duration estimated": 0.01,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 8,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881090072.08,
"dur": 0.25,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000050",
"duration estimated": 0.25,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 9,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881090072.44,
"dur": 0.0,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000050",
"duration estimated": 0.0,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 10,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881090072.55,
"dur": 2183.31,
"ph": "X",
"name": "Reduce Inline",
"args": {
"notify id": 0,
"duration estimated": 2183.31,
"stage": "0",
"step": "0",
"bandwidth": 21.61,
"stream id": 24,
"task id": 11,
"task type": "Reduce Inline",
"src rank": 1,
"dst rank": 0,
"transport type": "SDMA",
"size": 47178496
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092255.97,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000090",
"duration estimated": 0.26,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 12,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092256.34,
"dur": 0.0,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000090",
"duration estimated": 0.0,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 13,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092256.45,
"dur": 0.25,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000050",
"duration estimated": 0.25,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 14,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092256.81,
"dur": 0.0,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000050",
"duration estimated": 0.0,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 15,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092256.92,
"dur": 150.71,
"ph": "X",
"name": "Memcpy",
"args": {
"notify id": 0,
"duration estimated": 150.71,
"stage": "0",
"step": "0",
"bandwidth": 313.04,
"stream id": 24,
"task id": 16,
"task type": "Memcpy",
"src rank": 4294967295,
"dst rank": 0,
"transport type": "SDMA",
"size": 47178496
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092407.73,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000090",
"duration estimated": 0.26,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 17,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092408.09,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000090",
"duration estimated": 0.01,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 18,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092408.2,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000050",
"duration estimated": 0.26,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 19,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092408.56,
"dur": 0.0,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000050",
"duration estimated": 0.0,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 20,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881092408.67,
"dur": 2182.35,
"ph": "X",
"name": "Memcpy",
"args": {
"notify id": 0,
"duration estimated": 2182.35,
"stage": "2",
"step": "0",
"bandwidth": 21.62,
"stream id": 24,
"task id": 21,
"task type": "Memcpy",
"src rank": 1,
"dst rank": 0,
"transport type": "SDMA",
"size": 47178496
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881094591.12,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000090",
"duration estimated": 0.26,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 22,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881094591.48,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000090",
"duration estimated": 0.01,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 23,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881094591.59,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000050",
"duration estimated": 0.26,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 24,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881094591.95,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000050",
"duration estimated": 0.01,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 25,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
}
]
}

View File

@ -0,0 +1,424 @@
{
"device id": "6",
"iteration": 2,
"traceEvents": [
{
"tid": 2,
"pid": "6",
"ts": 616881123726.78,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000090",
"duration estimated": 0.26,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 7,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881123727.14,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000090",
"duration estimated": 0.01,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 8,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881123727.25,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000050",
"duration estimated": 0.26,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 9,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881123727.61,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000050",
"duration estimated": 0.01,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 10,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881123727.72,
"dur": 2186.19,
"ph": "X",
"name": "Reduce Inline",
"args": {
"notify id": 0,
"duration estimated": 2186.19,
"stage": "0",
"step": "0",
"bandwidth": 21.58,
"stream id": 24,
"task id": 11,
"task type": "Reduce Inline",
"src rank": 1,
"dst rank": 0,
"transport type": "SDMA",
"size": 47178496
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881125914.01,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000090",
"duration estimated": 0.26,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 12,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881125914.37,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000090",
"duration estimated": 0.01,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 13,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881125914.48,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000050",
"duration estimated": 0.26,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 14,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881125914.84,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000050",
"duration estimated": 0.01,
"stage": "0",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 15,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881125914.95,
"dur": 150.81,
"ph": "X",
"name": "Memcpy",
"args": {
"notify id": 0,
"duration estimated": 150.81,
"stage": "0",
"step": "0",
"bandwidth": 312.83,
"stream id": 24,
"task id": 16,
"task type": "Memcpy",
"src rank": 4294967295,
"dst rank": 0,
"transport type": "SDMA",
"size": 47178496
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881126065.86,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000090",
"duration estimated": 0.26,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 17,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881126066.22,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000090",
"duration estimated": 0.01,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 18,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881126066.33,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000050",
"duration estimated": 0.26,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 19,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881126066.7,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000050",
"duration estimated": 0.01,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 20,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881126066.85,
"dur": 2182.26,
"ph": "X",
"name": "Memcpy",
"args": {
"notify id": 0,
"duration estimated": 2182.26,
"stage": "2",
"step": "0",
"bandwidth": 21.62,
"stream id": 24,
"task id": 21,
"task type": "Memcpy",
"src rank": 1,
"dst rank": 0,
"transport type": "SDMA",
"size": 47178496
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881128249.22,
"dur": 0.26,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000090",
"duration estimated": 0.26,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 22,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881128249.59,
"dur": 0.0,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000090",
"duration estimated": 0.0,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 23,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881128249.71,
"dur": 0.25,
"ph": "X",
"name": "Notify Record",
"args": {
"notify id": "0x0000000100000050",
"duration estimated": 0.25,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 24,
"task type": "Notify Record",
"src rank": 0,
"dst rank": 1,
"transport type": "SDMA",
"size": null
}
},
{
"tid": 2,
"pid": "6",
"ts": 616881128250.07,
"dur": 0.01,
"ph": "X",
"name": "Notify Wait",
"args": {
"notify id": "0x0000000000000050",
"duration estimated": 0.01,
"stage": "2",
"step": "0",
"bandwidth": "NULL",
"stream id": 24,
"task id": 25,
"task type": "Notify Wait",
"src rank": 1,
"dst rank": 0,
"transport type": "LOCAL",
"size": null
}
}
]
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long