!11312 profiler: add memory parser to parse memory usage info

From: @zhangyunshu
Reviewed-by: 
Signed-off-by:
This commit is contained in:
mindspore-ci-bot 2021-01-20 16:22:15 +08:00 committed by Gitee
commit 7a660ca62b
6 changed files with 874 additions and 3 deletions

View File

@ -0,0 +1,15 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""The proto files for profiler."""

View File

@ -0,0 +1,50 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
syntax = "proto3";
package profiler;
message MemoryProto {
repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs
int64 total_mem = 2; // total allocated memory on device
}
message GraphMemProto {
int64 graph_id = 1; // graph id
int64 static_mem = 2; // size of allocated static memory for current graph
repeated NodeMemProto node_mems = 3; // execution nodes
repeated TensorMemProto tensor_mems = 4; // all tensors
string fp_start = 5; // node name of fp start
string bp_end = 6; // node name of bp end
}
message NodeMemProto {
string node_name = 1; // node name
int64 node_id = 2; // node id with respect to the execution order
repeated int64 input_tensor_id = 3; // input tensor id
repeated int64 output_tensor_id = 4; // output tensor id
repeated int64 workspace_tensor_id = 5; // workspace tensor id
}
message TensorMemProto {
int64 tensor_id = 1; // tensor id
int64 size = 2; // aligned tensor size
string type = 3; // tensor type, e.g. Common, OutputOnly
int64 life_start = 4; // node id at which memory allocated
int64 life_end = 5; // node id at which memory deallocated
string life_long = 6; // the type of tensor lifetime, e.g. LifeLongGraphAll
}

View File

@ -0,0 +1,295 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: mindspore/profiler/common/proto_files/memory_usage.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='mindspore/profiler/common/proto_files/memory_usage.proto',
package='profiler',
syntax='proto3',
serialized_options=None,
serialized_pb=_b('\n8mindspore/profiler/common/proto_files/memory_usage.proto\x12\x08profiler\"L\n\x0bMemoryProto\x12*\n\tgraph_mem\x18\x01 \x03(\x0b\x32\x17.profiler.GraphMemProto\x12\x11\n\ttotal_mem\x18\x02 \x01(\x03\"\xb1\x01\n\rGraphMemProto\x12\x10\n\x08graph_id\x18\x01 \x01(\x03\x12\x12\n\nstatic_mem\x18\x02 \x01(\x03\x12)\n\tnode_mems\x18\x03 \x03(\x0b\x32\x16.profiler.NodeMemProto\x12-\n\x0btensor_mems\x18\x04 \x03(\x0b\x32\x18.profiler.TensorMemProto\x12\x10\n\x08\x66p_start\x18\x05 \x01(\t\x12\x0e\n\x06\x62p_end\x18\x06 \x01(\t\"\x82\x01\n\x0cNodeMemProto\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\x03\x12\x17\n\x0finput_tensor_id\x18\x03 \x03(\x03\x12\x18\n\x10output_tensor_id\x18\x04 \x03(\x03\x12\x1b\n\x13workspace_tensor_id\x18\x05 \x03(\x03\"x\n\x0eTensorMemProto\x12\x11\n\ttensor_id\x18\x01 \x01(\x03\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x12\n\nlife_start\x18\x04 \x01(\x03\x12\x10\n\x08life_end\x18\x05 \x01(\x03\x12\x11\n\tlife_long\x18\x06 \x01(\tb\x06proto3')
)
_MEMORYPROTO = _descriptor.Descriptor(
name='MemoryProto',
full_name='profiler.MemoryProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='graph_mem', full_name='profiler.MemoryProto.graph_mem', index=0,
number=1, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='total_mem', full_name='profiler.MemoryProto.total_mem', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=70,
serialized_end=146,
)
_GRAPHMEMPROTO = _descriptor.Descriptor(
name='GraphMemProto',
full_name='profiler.GraphMemProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='graph_id', full_name='profiler.GraphMemProto.graph_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='static_mem', full_name='profiler.GraphMemProto.static_mem', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='node_mems', full_name='profiler.GraphMemProto.node_mems', index=2,
number=3, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='tensor_mems', full_name='profiler.GraphMemProto.tensor_mems', index=3,
number=4, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='fp_start', full_name='profiler.GraphMemProto.fp_start', index=4,
number=5, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='bp_end', full_name='profiler.GraphMemProto.bp_end', index=5,
number=6, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=149,
serialized_end=326,
)
_NODEMEMPROTO = _descriptor.Descriptor(
name='NodeMemProto',
full_name='profiler.NodeMemProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='node_name', full_name='profiler.NodeMemProto.node_name', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='node_id', full_name='profiler.NodeMemProto.node_id', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='input_tensor_id', full_name='profiler.NodeMemProto.input_tensor_id', index=2,
number=3, type=3, cpp_type=2, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='output_tensor_id', full_name='profiler.NodeMemProto.output_tensor_id', index=3,
number=4, type=3, cpp_type=2, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='workspace_tensor_id', full_name='profiler.NodeMemProto.workspace_tensor_id', index=4,
number=5, type=3, cpp_type=2, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=329,
serialized_end=459,
)
_TENSORMEMPROTO = _descriptor.Descriptor(
name='TensorMemProto',
full_name='profiler.TensorMemProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='tensor_id', full_name='profiler.TensorMemProto.tensor_id', index=0,
number=1, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='size', full_name='profiler.TensorMemProto.size', index=1,
number=2, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='type', full_name='profiler.TensorMemProto.type', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='life_start', full_name='profiler.TensorMemProto.life_start', index=3,
number=4, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='life_end', full_name='profiler.TensorMemProto.life_end', index=4,
number=5, type=3, cpp_type=2, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='life_long', full_name='profiler.TensorMemProto.life_long', index=5,
number=6, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
serialized_options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
serialized_options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=461,
serialized_end=581,
)
_MEMORYPROTO.fields_by_name['graph_mem'].message_type = _GRAPHMEMPROTO
_GRAPHMEMPROTO.fields_by_name['node_mems'].message_type = _NODEMEMPROTO
_GRAPHMEMPROTO.fields_by_name['tensor_mems'].message_type = _TENSORMEMPROTO
DESCRIPTOR.message_types_by_name['MemoryProto'] = _MEMORYPROTO
DESCRIPTOR.message_types_by_name['GraphMemProto'] = _GRAPHMEMPROTO
DESCRIPTOR.message_types_by_name['NodeMemProto'] = _NODEMEMPROTO
DESCRIPTOR.message_types_by_name['TensorMemProto'] = _TENSORMEMPROTO
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
MemoryProto = _reflection.GeneratedProtocolMessageType('MemoryProto', (_message.Message,), {
'DESCRIPTOR' : _MEMORYPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.MemoryProto)
})
_sym_db.RegisterMessage(MemoryProto)
GraphMemProto = _reflection.GeneratedProtocolMessageType('GraphMemProto', (_message.Message,), {
'DESCRIPTOR' : _GRAPHMEMPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.GraphMemProto)
})
_sym_db.RegisterMessage(GraphMemProto)
NodeMemProto = _reflection.GeneratedProtocolMessageType('NodeMemProto', (_message.Message,), {
'DESCRIPTOR' : _NODEMEMPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.NodeMemProto)
})
_sym_db.RegisterMessage(NodeMemProto)
TensorMemProto = _reflection.GeneratedProtocolMessageType('TensorMemProto', (_message.Message,), {
'DESCRIPTOR' : _TENSORMEMPROTO,
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
# @@protoc_insertion_point(class_scope:profiler.TensorMemProto)
})
_sym_db.RegisterMessage(TensorMemProto)
# @@protoc_insertion_point(module_scope)

View File

@ -1,4 +1,4 @@
# Copyright 2020 Huawei Technologies Co., Ltd
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@ -13,6 +13,9 @@
# limitations under the License.
# ============================================================================
"""The container of metadata used in profiler parser."""
import heapq
GIGABYTES = 1024 * 1024 * 1024
class HWTSContainer:
@ -111,3 +114,138 @@ class TimelineContainer:
def pid(self):
"""Get the pid of the operator execution."""
return self._pid
class MemoryGraph:
"""
A container for graph.
Args:
graph_proto (proto): Graph proto, defined in profiler module.
"""
def __init__(self, graph_proto):
self._graph_proto = graph_proto
self.graph_id = graph_proto.graph_id
self.static_mem = graph_proto.static_mem / GIGABYTES
self.fp_start = None
self.bp_end = None
self.lines = []
self.nodes = {}
def to_dict(self):
"""Convert Graph to dict."""
graph = {
'graph_id': self.graph_id,
'static_mem': self.static_mem,
'nodes': self.nodes,
'fp_start': self.fp_start,
'bp_end': self.bp_end,
'lines': self.lines
}
return graph
class MemoryNode:
"""
A container for node.
Args:
node_proto (proto): Node proto.
graph_id (int): Graph id.
"""
def __init__(self, node_proto, graph_id):
self._node_proto = node_proto
self.graph_id = graph_id
self.node_id = node_proto.node_id
self.name = node_proto.node_name
self.fullname = ""
self.input_ids = [t_id for t_id in node_proto.input_tensor_id]
self.output_ids = [t_id for t_id in node_proto.output_tensor_id]
self.workspace_ids = [t_id for t_id in node_proto.workspace_tensor_id]
self.inputs = []
self.outputs = []
self.workspaces = []
self.allocations = 0
self.deallocations = 0
self.size = 0
self.mem_change = 0
def to_dict(self):
"""Convert Node to dict."""
node = {
'name': self.name,
'fullname': self.fullname,
'node_id': self.node_id,
'allocations': self.allocations,
'size': self.size,
'allocated': self.mem_change,
'inputs': self.inputs,
'outputs': self.outputs,
'workspaces': self.workspaces
}
return node
class MemoryTensor:
"""
A container for tensor.
Args:
tensor_proto (proto): Tensor proto.
graph_id (int): Graph id.
"""
def __init__(self, tensor_proto, graph_id):
self._tensor_proto = tensor_proto
self.tensor_id = tensor_proto.tensor_id
self.life_long = tensor_proto.life_long
self.life_start = tensor_proto.life_start
self.life_end = tensor_proto.life_end
self.size = tensor_proto.size / GIGABYTES
self.type = tensor_proto.type
self.graph_id = graph_id
def to_dict(self):
"""Convert Tensor to a dict."""
tensor = {
'tensor_id': self.tensor_id,
'size': self.size,
'type': self.type,
'life_long': self.life_long,
'life_start': self.life_start,
'life_end': self.life_end
}
return tensor
class MemoryQueue:
"""
A priority queue to keep specified number of active nodes in memory activities.
Args:
size (int): The upper limit of nodes to be saved.
"""
def __init__(self, size):
self._queue = []
self._index = 0
self._size = size
def push(self, item, priority):
"""
Push a node into MemoryQueue.
Args:
item (tuple): Node item including id, name, etc.
priority (int): The priority of the item.
"""
if self._index < self._size:
heapq.heappush(self._queue, (-priority, item))
self._index += 1
else:
heapq.heappushpop(self._queue, (-priority, item))
def get_items(self):
"""Get the elements in MemoryQueue."""
return self._queue

View File

@ -0,0 +1,355 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Memory Usage Parser."""
import json
import os
import stat
from google.protobuf.text_format import ParseError
from mindspore import log as logger
from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \
ProfilerFileNotFoundException, ProfilerRawFileException
from mindspore.profiler.common.proto_files.memory_usage_pb2 import MemoryProto
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
from mindspore.profiler.parser.container import MemoryGraph as Graph
from mindspore.profiler.parser.container import MemoryNode as Node
from mindspore.profiler.parser.container import MemoryQueue
from mindspore.profiler.parser.container import MemoryTensor as Tensor
GIGABYTES = 1024 * 1024 * 1024
class MemoryUsageParser:
"""MemoryUsageParser to parse memory raw data."""
def __init__(self, profiling_dir, device_id):
self._profiling_dir = profiling_dir
self._device_id = device_id
self._proto_file_path = 'memory_usage_{}.pb'
self._summary_filename = 'memory_usage_summary_{}.json'
self._details_filename = 'memory_usage_details_{}.json'
self._graphs_dict = {}
self._peak_mem = 0
self._mem_summary = {
'capacity': 0,
'allocations': 0,
'deallocations': 0,
'peak_mem': 0,
'static_mem': 0,
'breakdowns': []
}
self._active_nodes = MemoryQueue(size=10)
self._framework = {}
def _get_file_path(self):
"""Get the proto file path."""
file_path = os.path.join(
self._profiling_dir,
self._proto_file_path.format(self._device_id)
)
file_path = validate_and_normalize_path(file_path)
if not os.path.exists(file_path):
msg = 'The memory file does not exist!'
logger.error(msg)
raise ProfilerFileNotFoundException(msg=msg)
return file_path
def init_memory_usage_info(self, aicore_detail_data, points):
"""Init memory usage information."""
logger.info("Start to load memory usage data from pb file")
file_path = self._get_file_path()
self._framework = self._process_framework_info(aicore_detail_data)
try:
with open(file_path, 'rb') as f:
content = f.read()
except (IOError, OSError) as err:
logger.error('Failed to read memory file: %s', err)
raise ProfilerIOException
model_proto = MemoryProto()
try:
model_proto.ParseFromString(content)
except ParseError as err:
msg = "Fail to parse memory proto file."
logger.error("Cannot parse the memory file. Please check the file schema.\n%s", err)
raise ProfilerRawFileException(msg)
graphs = model_proto.graph_mem
self._graphs_dict = self._parse_graphs(graphs, points)
self._mem_summary['capacity'] = model_proto.total_mem / GIGABYTES
self._mem_summary['peak_mem'] = self._peak_mem
self._process_memory_breakdowns()
logger.info('Finished processing memory usage data.')
def _parse_graphs(self, graphs, points):
"""Parse subgraphs."""
graphs_dict = {}
for graph_proto in graphs:
graph_id = graph_proto.graph_id
if graph_id is None:
logger.info('Graph id is missing, skipped the graph.')
continue
graph = Graph(graph_proto)
# process tensors in the graph
tensors_proto = graph_proto.tensor_mems
if not tensors_proto:
logger.info('No tensor in graph %s, skipped.', graph_id)
continue
tensors_dict = self._parse_tensors(tensors_proto, graph_id)
# calculate memory usage of the graph by number of nodes and details of tensors
nodes_proto = graph_proto.node_mems
# init memory usage list with static memory
mem_change = [graph.static_mem for _ in range(len(nodes_proto))]
self._calc_mem_change(mem_change, tensors_dict)
graph.lines = mem_change
# process nodes in graph
graph.nodes = self._parse_nodes(
nodes_proto, mem_change, tensors_dict, graph
)
# update fp_start and bp_end
point_id = self._locate_fp_bp_id(points, graph.nodes)
graph.fp_start = point_id.get('fp_start')
graph.bp_end = point_id.get('bp_end')
graphs_dict.update({graph_id: graph.to_dict()})
self._mem_summary['static_mem'] += graph.static_mem
self._mem_summary['allocations'] += len(tensors_dict) + 1
self._mem_summary['deallocations'] += len(tensors_dict) + 1
self._peak_mem = max(max(mem_change), self._peak_mem)
return graphs_dict
@staticmethod
def _parse_tensors(tensors_proto, graph_id):
"""Parse tensors."""
tensors_dict = {}
for tensor_proto in tensors_proto:
tensor = Tensor(tensor_proto, graph_id)
tensors_dict.update({tensor.tensor_id: tensor})
return tensors_dict
def _parse_nodes(self, nodes_proto, mem_change, tensors_dict, graph):
"""Parse nodes."""
nodes_dict = {}
for index, node_proto in enumerate(nodes_proto):
node = Node(node_proto, graph.graph_id)
tensors = set(node.output_ids + node.workspace_ids)
node.size = self._calc_node_memory(tensors, tensors_dict)
node.allocations = len(tensors)
node.deallocations = len(tensors)
# calculate the allocated/deallocated memory size on the node
if index == 0:
node.mem_change = mem_change[index] - graph.static_mem
else:
node.mem_change = mem_change[index] - mem_change[index-1]
self._update_nodes(node, tensors_dict)
nodes_dict[node.name] = node.to_dict()
# update active nodes
self._active_nodes.push(
item=(node.name, node.node_id, node.size, graph.graph_id),
priority=-node.size # priority is the negative value of node size
)
return nodes_dict
def _update_nodes(self, node, tensors_dict):
"""Update nodes."""
skipped = self._find_conflict_tensors(node)
name = node.name
if self._framework and name in self._framework:
node_frame = self._framework[name]
node.fullname = node_frame.get('fullname')
info = node_frame.get('args')
for key, value in info.items():
if 'input' in key:
node.inputs.append(value)
else:
node.outputs.append(value)
node.inputs = self._fill_tensor_dict(
node.inputs, node.input_ids, tensors_dict, 'input'
)
node.outputs = self._fill_tensor_dict(
node.outputs, node.output_ids, tensors_dict, 'output'
)
node.workspaces = self._fill_tensor_dict(
node.workspaces, node.workspace_ids, tensors_dict, 'workspace', skipped
)
@staticmethod
def _find_conflict_tensors(node):
"""Find conflict tensors in node."""
output_list = []
if node.output_ids:
output_list = node.output_ids
skipped = []
if node.workspace_ids:
for t_id in node.workspace_ids:
if t_id in output_list:
skipped.append(t_id)
return skipped
@staticmethod
def _fill_tensor_dict(node_ios, tensor_ids, tensors_dict, tensor_type, skipped=None):
"""Fill tensor dict."""
full_list = []
for t_id, io_dict in zip(tensor_ids, node_ios):
if tensor_type == 'workspace' and t_id in skipped:
continue
tensor = tensors_dict.get(t_id)
tensor.type = tensor_type
io_dict.update(tensor.to_dict())
full_list.append(io_dict)
return full_list
@staticmethod
def _calc_node_memory(tensors, tensors_dict):
"""Calculate the allocated memory for the node."""
node_mem = 0
for t_id in tensors:
tensor = tensors_dict[t_id]
size = tensor.size
node_mem += size
return node_mem
def _calc_mem_change(self, mem_change, tensors_dict):
"""Calculate the memory change for the subgraph."""
node_num = len(mem_change)
for tensor_id, tensor in tensors_dict.items():
life_long = tensor.life_long
life_start = tensor.life_start
life_end = tensor.life_end
size = tensor.size
# Update memory change for the entire graph.
# If a tensor's lifetime cannot be fully located, it will be ignored as 0 change.
if life_long == 'LifeLongGraphAll': # lifetime is from graph start to graph end
tensor.life_start = 0
tensor.life_end = node_num
self._update_mem_change(mem_change, size, 0, node_num)
elif life_long == 'LifeLongGraphStart': # lifetime is from graph start to tensor end
if life_end is not None and life_end >= 0:
tensor.life_start = 0
self._update_mem_change(mem_change, size, 0, life_end+1)
else:
logger.info('Cannot locate lifetime end for tensor: %s', tensor_id)
elif life_long == 'LifeLongGraphEnd': # lifetime is from tensor start to graph end
if life_start is not None and life_start <= node_num:
tensor.life_end = node_num
self._update_mem_change(mem_change, size, life_start, node_num)
else:
logger.info('Cannot locate lifetime start for tensor: %s', tensor_id)
elif life_long == 'LifeLongNone': # lifetime is from tensor start to tensor end
if life_start is not None and life_end is not None and life_start <= life_end:
self._update_mem_change(mem_change, size, life_start, life_end+1)
else:
logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id)
@staticmethod
def _update_mem_change(mem_change, size, start, end):
"""Update memory change for the subgraph."""
for i in range(start, end):
mem_change[i] += size
@staticmethod
def _locate_fp_bp_id(points, nodes):
"""Locate the node id of fp_start and bp_end in graph."""
point_id = {
'fp_start': None,
'bp_end': None
}
fp_start = points.get('fp_start')
bp_end = points.get('bp_end')
fp_name = fp_start.split('/')[-1] if fp_start else ""
bp_name = bp_end.split('/')[-1] if bp_end else ""
if fp_name in nodes:
point_id['fp_start'] = nodes[fp_name].get('node_id')
if bp_name in nodes:
point_id['bp_end'] = nodes[bp_name].get('node_id')
return point_id
def _write_memory_files(self, filename, content):
"""Write the summary and top breakdowns of memory usage."""
file_path = os.path.join(self._profiling_dir, filename)
file_path = validate_and_normalize_path(file_path)
try:
with open(file_path, 'w') as json_file:
json.dump(content, json_file)
os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE)
except (IOError, OSError) as err:
logger.error('Fail to write memory file.\n%s', err)
raise ProfilerIOException
def write_memory_files(self):
"""Write memory files."""
logger.info('Start recording memory data into files...')
# write memory summary to json file
summary = self._summary_filename.format(self._device_id)
self._write_memory_files(summary, self._mem_summary)
# write memory details to json file
details = self._details_filename.format(self._device_id)
self._write_memory_files(details, self._graphs_dict)
logger.info('Successfully write memory data into files.')
def _process_memory_breakdowns(self):
"""Process memory breakdowns."""
breakdowns = []
active_nodes = self._active_nodes.get_items()
for _, node_meta in active_nodes:
node_name, _, _, graph_id = node_meta
graph = self._graphs_dict[graph_id]
nodes_dict = graph.get('nodes')
node = nodes_dict.get(node_name)
if 'inputs' in node:
node.pop('inputs')
breakdowns.append(node)
self._mem_summary['breakdowns'] = breakdowns
@staticmethod
def _process_framework_info(aicore_detail_data):
"""Process framework info."""
framework_info_dict = {}
for framework_obj in aicore_detail_data:
op_name = framework_obj[0]
op_full_name = framework_obj[4]
op_info = framework_obj[5]
framework_info_dict[op_name] = {
'fullname': op_full_name,
'name': op_name,
'args': op_info
}
return framework_info_dict

View File

@ -22,7 +22,7 @@ from enum import Enum
from mindspore import log as logger, context
from mindspore.communication.management import release, get_rank
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
ProfilerIOException, ProfilerException
ProfilerIOException, ProfilerException, ProfilerRawFileException
from mindspore.profiler.common.util import get_file_names, fwrite_format
from mindspore.profiler.common.validator.validate_path import \
validate_and_normalize_path
@ -31,6 +31,7 @@ from mindspore.profiler.parser.framework_parser import FrameworkParser
from mindspore.profiler.parser.hwts_log_parser import HWTSLogParser
from mindspore.profiler.parser.integrator import Integrator
from mindspore.profiler.parser.integrator import GpuTimelineGenerator, AscendTimelineGenerator
from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
from mindspore.profiler.parser.minddata_parser import MinddataParser
from mindspore.profiler.parser.minddata_pipeline_parser import \
MinddataPipelineParser
@ -249,8 +250,9 @@ class Profiler:
logger.warning(err.message)
# analyse step trace info
points = None
try:
self._analyse_step_trace(source_path, framework_parser)
points = self._analyse_step_trace(source_path, framework_parser)
except ProfilerException as err:
logger.warning(err.message)
@ -260,6 +262,12 @@ class Profiler:
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
logger.warning('Fail to write timeline data: %s', err)
# analyse memory usage info
try:
self._analyse_memory_usage(points)
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
logger.warning(err.message)
os.environ['PROFILING_MODE'] = str("false")
context.set_context(enable_profiling=False)
@ -316,6 +324,8 @@ class Profiler:
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
logger.info("The point info is: %s", point_info)
return point_info
def _analyse_timeline(self, aicpu_parser, optime_parser):
"""
Analyse and parse timeline info.
@ -366,6 +376,14 @@ class Profiler:
logger.warning('Fail to write timeline data: %s', err)
raise RuntimeError('Fail to write timeline data.')
def _analyse_memory_usage(self, points):
"""Analyse memory usage data."""
integrator = Integrator(self._output_path, self._dev_id)
aicore_detail_data = integrator.get_aicore_detail_data()
memory_parser = MemoryUsageParser(self._output_path, self._dev_id)
memory_parser.init_memory_usage_info(aicore_detail_data, points)
memory_parser.write_memory_files()
def _get_profiling_job_id(self):
"""Get profiling job id, which was generated by ada service.