forked from mindspore-Ecosystem/mindspore
!11312 profiler: add memory parser to parse memory usage info
From: @zhangyunshu Reviewed-by: Signed-off-by:
This commit is contained in:
commit
7a660ca62b
|
@ -0,0 +1,15 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""The proto files for profiler."""
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package profiler;
|
||||
|
||||
message MemoryProto {
|
||||
repeated GraphMemProto graph_mem = 1; // memory usage of multiple graphs
|
||||
int64 total_mem = 2; // total allocated memory on device
|
||||
}
|
||||
|
||||
message GraphMemProto {
|
||||
int64 graph_id = 1; // graph id
|
||||
int64 static_mem = 2; // size of allocated static memory for current graph
|
||||
repeated NodeMemProto node_mems = 3; // execution nodes
|
||||
repeated TensorMemProto tensor_mems = 4; // all tensors
|
||||
string fp_start = 5; // node name of fp start
|
||||
string bp_end = 6; // node name of bp end
|
||||
}
|
||||
|
||||
message NodeMemProto {
|
||||
string node_name = 1; // node name
|
||||
int64 node_id = 2; // node id with respect to the execution order
|
||||
repeated int64 input_tensor_id = 3; // input tensor id
|
||||
repeated int64 output_tensor_id = 4; // output tensor id
|
||||
repeated int64 workspace_tensor_id = 5; // workspace tensor id
|
||||
}
|
||||
|
||||
message TensorMemProto {
|
||||
int64 tensor_id = 1; // tensor id
|
||||
int64 size = 2; // aligned tensor size
|
||||
string type = 3; // tensor type, e.g. Common, OutputOnly
|
||||
int64 life_start = 4; // node id at which memory allocated
|
||||
int64 life_end = 5; // node id at which memory deallocated
|
||||
string life_long = 6; // the type of tensor lifetime, e.g. LifeLongGraphAll
|
||||
}
|
|
@ -0,0 +1,295 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# source: mindspore/profiler/common/proto_files/memory_usage.proto
|
||||
|
||||
import sys
|
||||
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import message as _message
|
||||
from google.protobuf import reflection as _reflection
|
||||
from google.protobuf import symbol_database as _symbol_database
|
||||
# @@protoc_insertion_point(imports)
|
||||
|
||||
_sym_db = _symbol_database.Default()
|
||||
|
||||
|
||||
|
||||
|
||||
DESCRIPTOR = _descriptor.FileDescriptor(
|
||||
name='mindspore/profiler/common/proto_files/memory_usage.proto',
|
||||
package='profiler',
|
||||
syntax='proto3',
|
||||
serialized_options=None,
|
||||
serialized_pb=_b('\n8mindspore/profiler/common/proto_files/memory_usage.proto\x12\x08profiler\"L\n\x0bMemoryProto\x12*\n\tgraph_mem\x18\x01 \x03(\x0b\x32\x17.profiler.GraphMemProto\x12\x11\n\ttotal_mem\x18\x02 \x01(\x03\"\xb1\x01\n\rGraphMemProto\x12\x10\n\x08graph_id\x18\x01 \x01(\x03\x12\x12\n\nstatic_mem\x18\x02 \x01(\x03\x12)\n\tnode_mems\x18\x03 \x03(\x0b\x32\x16.profiler.NodeMemProto\x12-\n\x0btensor_mems\x18\x04 \x03(\x0b\x32\x18.profiler.TensorMemProto\x12\x10\n\x08\x66p_start\x18\x05 \x01(\t\x12\x0e\n\x06\x62p_end\x18\x06 \x01(\t\"\x82\x01\n\x0cNodeMemProto\x12\x11\n\tnode_name\x18\x01 \x01(\t\x12\x0f\n\x07node_id\x18\x02 \x01(\x03\x12\x17\n\x0finput_tensor_id\x18\x03 \x03(\x03\x12\x18\n\x10output_tensor_id\x18\x04 \x03(\x03\x12\x1b\n\x13workspace_tensor_id\x18\x05 \x03(\x03\"x\n\x0eTensorMemProto\x12\x11\n\ttensor_id\x18\x01 \x01(\x03\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x12\n\nlife_start\x18\x04 \x01(\x03\x12\x10\n\x08life_end\x18\x05 \x01(\x03\x12\x11\n\tlife_long\x18\x06 \x01(\tb\x06proto3')
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
_MEMORYPROTO = _descriptor.Descriptor(
|
||||
name='MemoryProto',
|
||||
full_name='profiler.MemoryProto',
|
||||
filename=None,
|
||||
file=DESCRIPTOR,
|
||||
containing_type=None,
|
||||
fields=[
|
||||
_descriptor.FieldDescriptor(
|
||||
name='graph_mem', full_name='profiler.MemoryProto.graph_mem', index=0,
|
||||
number=1, type=11, cpp_type=10, label=3,
|
||||
has_default_value=False, default_value=[],
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='total_mem', full_name='profiler.MemoryProto.total_mem', index=1,
|
||||
number=2, type=3, cpp_type=2, label=1,
|
||||
has_default_value=False, default_value=0,
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
],
|
||||
extensions=[
|
||||
],
|
||||
nested_types=[],
|
||||
enum_types=[
|
||||
],
|
||||
serialized_options=None,
|
||||
is_extendable=False,
|
||||
syntax='proto3',
|
||||
extension_ranges=[],
|
||||
oneofs=[
|
||||
],
|
||||
serialized_start=70,
|
||||
serialized_end=146,
|
||||
)
|
||||
|
||||
|
||||
_GRAPHMEMPROTO = _descriptor.Descriptor(
|
||||
name='GraphMemProto',
|
||||
full_name='profiler.GraphMemProto',
|
||||
filename=None,
|
||||
file=DESCRIPTOR,
|
||||
containing_type=None,
|
||||
fields=[
|
||||
_descriptor.FieldDescriptor(
|
||||
name='graph_id', full_name='profiler.GraphMemProto.graph_id', index=0,
|
||||
number=1, type=3, cpp_type=2, label=1,
|
||||
has_default_value=False, default_value=0,
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='static_mem', full_name='profiler.GraphMemProto.static_mem', index=1,
|
||||
number=2, type=3, cpp_type=2, label=1,
|
||||
has_default_value=False, default_value=0,
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='node_mems', full_name='profiler.GraphMemProto.node_mems', index=2,
|
||||
number=3, type=11, cpp_type=10, label=3,
|
||||
has_default_value=False, default_value=[],
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='tensor_mems', full_name='profiler.GraphMemProto.tensor_mems', index=3,
|
||||
number=4, type=11, cpp_type=10, label=3,
|
||||
has_default_value=False, default_value=[],
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='fp_start', full_name='profiler.GraphMemProto.fp_start', index=4,
|
||||
number=5, type=9, cpp_type=9, label=1,
|
||||
has_default_value=False, default_value=_b("").decode('utf-8'),
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='bp_end', full_name='profiler.GraphMemProto.bp_end', index=5,
|
||||
number=6, type=9, cpp_type=9, label=1,
|
||||
has_default_value=False, default_value=_b("").decode('utf-8'),
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
],
|
||||
extensions=[
|
||||
],
|
||||
nested_types=[],
|
||||
enum_types=[
|
||||
],
|
||||
serialized_options=None,
|
||||
is_extendable=False,
|
||||
syntax='proto3',
|
||||
extension_ranges=[],
|
||||
oneofs=[
|
||||
],
|
||||
serialized_start=149,
|
||||
serialized_end=326,
|
||||
)
|
||||
|
||||
|
||||
_NODEMEMPROTO = _descriptor.Descriptor(
|
||||
name='NodeMemProto',
|
||||
full_name='profiler.NodeMemProto',
|
||||
filename=None,
|
||||
file=DESCRIPTOR,
|
||||
containing_type=None,
|
||||
fields=[
|
||||
_descriptor.FieldDescriptor(
|
||||
name='node_name', full_name='profiler.NodeMemProto.node_name', index=0,
|
||||
number=1, type=9, cpp_type=9, label=1,
|
||||
has_default_value=False, default_value=_b("").decode('utf-8'),
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='node_id', full_name='profiler.NodeMemProto.node_id', index=1,
|
||||
number=2, type=3, cpp_type=2, label=1,
|
||||
has_default_value=False, default_value=0,
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='input_tensor_id', full_name='profiler.NodeMemProto.input_tensor_id', index=2,
|
||||
number=3, type=3, cpp_type=2, label=3,
|
||||
has_default_value=False, default_value=[],
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='output_tensor_id', full_name='profiler.NodeMemProto.output_tensor_id', index=3,
|
||||
number=4, type=3, cpp_type=2, label=3,
|
||||
has_default_value=False, default_value=[],
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='workspace_tensor_id', full_name='profiler.NodeMemProto.workspace_tensor_id', index=4,
|
||||
number=5, type=3, cpp_type=2, label=3,
|
||||
has_default_value=False, default_value=[],
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
],
|
||||
extensions=[
|
||||
],
|
||||
nested_types=[],
|
||||
enum_types=[
|
||||
],
|
||||
serialized_options=None,
|
||||
is_extendable=False,
|
||||
syntax='proto3',
|
||||
extension_ranges=[],
|
||||
oneofs=[
|
||||
],
|
||||
serialized_start=329,
|
||||
serialized_end=459,
|
||||
)
|
||||
|
||||
|
||||
_TENSORMEMPROTO = _descriptor.Descriptor(
|
||||
name='TensorMemProto',
|
||||
full_name='profiler.TensorMemProto',
|
||||
filename=None,
|
||||
file=DESCRIPTOR,
|
||||
containing_type=None,
|
||||
fields=[
|
||||
_descriptor.FieldDescriptor(
|
||||
name='tensor_id', full_name='profiler.TensorMemProto.tensor_id', index=0,
|
||||
number=1, type=3, cpp_type=2, label=1,
|
||||
has_default_value=False, default_value=0,
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='size', full_name='profiler.TensorMemProto.size', index=1,
|
||||
number=2, type=3, cpp_type=2, label=1,
|
||||
has_default_value=False, default_value=0,
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='type', full_name='profiler.TensorMemProto.type', index=2,
|
||||
number=3, type=9, cpp_type=9, label=1,
|
||||
has_default_value=False, default_value=_b("").decode('utf-8'),
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='life_start', full_name='profiler.TensorMemProto.life_start', index=3,
|
||||
number=4, type=3, cpp_type=2, label=1,
|
||||
has_default_value=False, default_value=0,
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='life_end', full_name='profiler.TensorMemProto.life_end', index=4,
|
||||
number=5, type=3, cpp_type=2, label=1,
|
||||
has_default_value=False, default_value=0,
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
_descriptor.FieldDescriptor(
|
||||
name='life_long', full_name='profiler.TensorMemProto.life_long', index=5,
|
||||
number=6, type=9, cpp_type=9, label=1,
|
||||
has_default_value=False, default_value=_b("").decode('utf-8'),
|
||||
message_type=None, enum_type=None, containing_type=None,
|
||||
is_extension=False, extension_scope=None,
|
||||
serialized_options=None, file=DESCRIPTOR),
|
||||
],
|
||||
extensions=[
|
||||
],
|
||||
nested_types=[],
|
||||
enum_types=[
|
||||
],
|
||||
serialized_options=None,
|
||||
is_extendable=False,
|
||||
syntax='proto3',
|
||||
extension_ranges=[],
|
||||
oneofs=[
|
||||
],
|
||||
serialized_start=461,
|
||||
serialized_end=581,
|
||||
)
|
||||
|
||||
_MEMORYPROTO.fields_by_name['graph_mem'].message_type = _GRAPHMEMPROTO
|
||||
_GRAPHMEMPROTO.fields_by_name['node_mems'].message_type = _NODEMEMPROTO
|
||||
_GRAPHMEMPROTO.fields_by_name['tensor_mems'].message_type = _TENSORMEMPROTO
|
||||
DESCRIPTOR.message_types_by_name['MemoryProto'] = _MEMORYPROTO
|
||||
DESCRIPTOR.message_types_by_name['GraphMemProto'] = _GRAPHMEMPROTO
|
||||
DESCRIPTOR.message_types_by_name['NodeMemProto'] = _NODEMEMPROTO
|
||||
DESCRIPTOR.message_types_by_name['TensorMemProto'] = _TENSORMEMPROTO
|
||||
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
|
||||
|
||||
MemoryProto = _reflection.GeneratedProtocolMessageType('MemoryProto', (_message.Message,), {
|
||||
'DESCRIPTOR' : _MEMORYPROTO,
|
||||
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
|
||||
# @@protoc_insertion_point(class_scope:profiler.MemoryProto)
|
||||
})
|
||||
_sym_db.RegisterMessage(MemoryProto)
|
||||
|
||||
GraphMemProto = _reflection.GeneratedProtocolMessageType('GraphMemProto', (_message.Message,), {
|
||||
'DESCRIPTOR' : _GRAPHMEMPROTO,
|
||||
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
|
||||
# @@protoc_insertion_point(class_scope:profiler.GraphMemProto)
|
||||
})
|
||||
_sym_db.RegisterMessage(GraphMemProto)
|
||||
|
||||
NodeMemProto = _reflection.GeneratedProtocolMessageType('NodeMemProto', (_message.Message,), {
|
||||
'DESCRIPTOR' : _NODEMEMPROTO,
|
||||
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
|
||||
# @@protoc_insertion_point(class_scope:profiler.NodeMemProto)
|
||||
})
|
||||
_sym_db.RegisterMessage(NodeMemProto)
|
||||
|
||||
TensorMemProto = _reflection.GeneratedProtocolMessageType('TensorMemProto', (_message.Message,), {
|
||||
'DESCRIPTOR' : _TENSORMEMPROTO,
|
||||
'__module__' : 'mindspore.profiler.common.proto_files.memory_usage_pb2'
|
||||
# @@protoc_insertion_point(class_scope:profiler.TensorMemProto)
|
||||
})
|
||||
_sym_db.RegisterMessage(TensorMemProto)
|
||||
|
||||
|
||||
# @@protoc_insertion_point(module_scope)
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -13,6 +13,9 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""The container of metadata used in profiler parser."""
|
||||
import heapq
|
||||
|
||||
GIGABYTES = 1024 * 1024 * 1024
|
||||
|
||||
|
||||
class HWTSContainer:
|
||||
|
@ -111,3 +114,138 @@ class TimelineContainer:
|
|||
def pid(self):
|
||||
"""Get the pid of the operator execution."""
|
||||
return self._pid
|
||||
|
||||
|
||||
class MemoryGraph:
|
||||
"""
|
||||
A container for graph.
|
||||
|
||||
Args:
|
||||
graph_proto (proto): Graph proto, defined in profiler module.
|
||||
"""
|
||||
def __init__(self, graph_proto):
|
||||
self._graph_proto = graph_proto
|
||||
self.graph_id = graph_proto.graph_id
|
||||
self.static_mem = graph_proto.static_mem / GIGABYTES
|
||||
self.fp_start = None
|
||||
self.bp_end = None
|
||||
self.lines = []
|
||||
self.nodes = {}
|
||||
|
||||
def to_dict(self):
|
||||
"""Convert Graph to dict."""
|
||||
graph = {
|
||||
'graph_id': self.graph_id,
|
||||
'static_mem': self.static_mem,
|
||||
'nodes': self.nodes,
|
||||
'fp_start': self.fp_start,
|
||||
'bp_end': self.bp_end,
|
||||
'lines': self.lines
|
||||
}
|
||||
|
||||
return graph
|
||||
|
||||
|
||||
class MemoryNode:
|
||||
"""
|
||||
A container for node.
|
||||
|
||||
Args:
|
||||
node_proto (proto): Node proto.
|
||||
graph_id (int): Graph id.
|
||||
"""
|
||||
def __init__(self, node_proto, graph_id):
|
||||
self._node_proto = node_proto
|
||||
self.graph_id = graph_id
|
||||
self.node_id = node_proto.node_id
|
||||
self.name = node_proto.node_name
|
||||
self.fullname = ""
|
||||
self.input_ids = [t_id for t_id in node_proto.input_tensor_id]
|
||||
self.output_ids = [t_id for t_id in node_proto.output_tensor_id]
|
||||
self.workspace_ids = [t_id for t_id in node_proto.workspace_tensor_id]
|
||||
self.inputs = []
|
||||
self.outputs = []
|
||||
self.workspaces = []
|
||||
self.allocations = 0
|
||||
self.deallocations = 0
|
||||
self.size = 0
|
||||
self.mem_change = 0
|
||||
|
||||
def to_dict(self):
|
||||
"""Convert Node to dict."""
|
||||
node = {
|
||||
'name': self.name,
|
||||
'fullname': self.fullname,
|
||||
'node_id': self.node_id,
|
||||
'allocations': self.allocations,
|
||||
'size': self.size,
|
||||
'allocated': self.mem_change,
|
||||
'inputs': self.inputs,
|
||||
'outputs': self.outputs,
|
||||
'workspaces': self.workspaces
|
||||
}
|
||||
|
||||
return node
|
||||
|
||||
|
||||
class MemoryTensor:
|
||||
"""
|
||||
A container for tensor.
|
||||
|
||||
Args:
|
||||
tensor_proto (proto): Tensor proto.
|
||||
graph_id (int): Graph id.
|
||||
"""
|
||||
def __init__(self, tensor_proto, graph_id):
|
||||
self._tensor_proto = tensor_proto
|
||||
self.tensor_id = tensor_proto.tensor_id
|
||||
self.life_long = tensor_proto.life_long
|
||||
self.life_start = tensor_proto.life_start
|
||||
self.life_end = tensor_proto.life_end
|
||||
self.size = tensor_proto.size / GIGABYTES
|
||||
self.type = tensor_proto.type
|
||||
self.graph_id = graph_id
|
||||
|
||||
def to_dict(self):
|
||||
"""Convert Tensor to a dict."""
|
||||
tensor = {
|
||||
'tensor_id': self.tensor_id,
|
||||
'size': self.size,
|
||||
'type': self.type,
|
||||
'life_long': self.life_long,
|
||||
'life_start': self.life_start,
|
||||
'life_end': self.life_end
|
||||
}
|
||||
|
||||
return tensor
|
||||
|
||||
|
||||
class MemoryQueue:
|
||||
"""
|
||||
A priority queue to keep specified number of active nodes in memory activities.
|
||||
|
||||
Args:
|
||||
size (int): The upper limit of nodes to be saved.
|
||||
"""
|
||||
def __init__(self, size):
|
||||
self._queue = []
|
||||
self._index = 0
|
||||
self._size = size
|
||||
|
||||
def push(self, item, priority):
|
||||
"""
|
||||
Push a node into MemoryQueue.
|
||||
|
||||
Args:
|
||||
item (tuple): Node item including id, name, etc.
|
||||
priority (int): The priority of the item.
|
||||
"""
|
||||
if self._index < self._size:
|
||||
heapq.heappush(self._queue, (-priority, item))
|
||||
self._index += 1
|
||||
else:
|
||||
heapq.heappushpop(self._queue, (-priority, item))
|
||||
|
||||
def get_items(self):
|
||||
"""Get the elements in MemoryQueue."""
|
||||
return self._queue
|
||||
|
|
|
@ -0,0 +1,355 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
"""Memory Usage Parser."""
|
||||
import json
|
||||
import os
|
||||
import stat
|
||||
|
||||
from google.protobuf.text_format import ParseError
|
||||
|
||||
from mindspore import log as logger
|
||||
from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, \
|
||||
ProfilerFileNotFoundException, ProfilerRawFileException
|
||||
from mindspore.profiler.common.proto_files.memory_usage_pb2 import MemoryProto
|
||||
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
||||
from mindspore.profiler.parser.container import MemoryGraph as Graph
|
||||
from mindspore.profiler.parser.container import MemoryNode as Node
|
||||
from mindspore.profiler.parser.container import MemoryQueue
|
||||
from mindspore.profiler.parser.container import MemoryTensor as Tensor
|
||||
|
||||
GIGABYTES = 1024 * 1024 * 1024
|
||||
|
||||
|
||||
class MemoryUsageParser:
|
||||
"""MemoryUsageParser to parse memory raw data."""
|
||||
def __init__(self, profiling_dir, device_id):
|
||||
self._profiling_dir = profiling_dir
|
||||
self._device_id = device_id
|
||||
self._proto_file_path = 'memory_usage_{}.pb'
|
||||
self._summary_filename = 'memory_usage_summary_{}.json'
|
||||
self._details_filename = 'memory_usage_details_{}.json'
|
||||
self._graphs_dict = {}
|
||||
self._peak_mem = 0
|
||||
self._mem_summary = {
|
||||
'capacity': 0,
|
||||
'allocations': 0,
|
||||
'deallocations': 0,
|
||||
'peak_mem': 0,
|
||||
'static_mem': 0,
|
||||
'breakdowns': []
|
||||
}
|
||||
self._active_nodes = MemoryQueue(size=10)
|
||||
self._framework = {}
|
||||
|
||||
def _get_file_path(self):
|
||||
"""Get the proto file path."""
|
||||
file_path = os.path.join(
|
||||
self._profiling_dir,
|
||||
self._proto_file_path.format(self._device_id)
|
||||
)
|
||||
file_path = validate_and_normalize_path(file_path)
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
msg = 'The memory file does not exist!'
|
||||
logger.error(msg)
|
||||
raise ProfilerFileNotFoundException(msg=msg)
|
||||
|
||||
return file_path
|
||||
|
||||
def init_memory_usage_info(self, aicore_detail_data, points):
|
||||
"""Init memory usage information."""
|
||||
logger.info("Start to load memory usage data from pb file")
|
||||
file_path = self._get_file_path()
|
||||
self._framework = self._process_framework_info(aicore_detail_data)
|
||||
|
||||
try:
|
||||
with open(file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Failed to read memory file: %s', err)
|
||||
raise ProfilerIOException
|
||||
|
||||
model_proto = MemoryProto()
|
||||
try:
|
||||
model_proto.ParseFromString(content)
|
||||
except ParseError as err:
|
||||
msg = "Fail to parse memory proto file."
|
||||
logger.error("Cannot parse the memory file. Please check the file schema.\n%s", err)
|
||||
raise ProfilerRawFileException(msg)
|
||||
|
||||
graphs = model_proto.graph_mem
|
||||
self._graphs_dict = self._parse_graphs(graphs, points)
|
||||
self._mem_summary['capacity'] = model_proto.total_mem / GIGABYTES
|
||||
self._mem_summary['peak_mem'] = self._peak_mem
|
||||
self._process_memory_breakdowns()
|
||||
|
||||
logger.info('Finished processing memory usage data.')
|
||||
|
||||
def _parse_graphs(self, graphs, points):
|
||||
"""Parse subgraphs."""
|
||||
graphs_dict = {}
|
||||
for graph_proto in graphs:
|
||||
graph_id = graph_proto.graph_id
|
||||
if graph_id is None:
|
||||
logger.info('Graph id is missing, skipped the graph.')
|
||||
continue
|
||||
|
||||
graph = Graph(graph_proto)
|
||||
|
||||
# process tensors in the graph
|
||||
tensors_proto = graph_proto.tensor_mems
|
||||
if not tensors_proto:
|
||||
logger.info('No tensor in graph %s, skipped.', graph_id)
|
||||
continue
|
||||
tensors_dict = self._parse_tensors(tensors_proto, graph_id)
|
||||
|
||||
# calculate memory usage of the graph by number of nodes and details of tensors
|
||||
nodes_proto = graph_proto.node_mems
|
||||
# init memory usage list with static memory
|
||||
mem_change = [graph.static_mem for _ in range(len(nodes_proto))]
|
||||
self._calc_mem_change(mem_change, tensors_dict)
|
||||
graph.lines = mem_change
|
||||
|
||||
# process nodes in graph
|
||||
graph.nodes = self._parse_nodes(
|
||||
nodes_proto, mem_change, tensors_dict, graph
|
||||
)
|
||||
|
||||
# update fp_start and bp_end
|
||||
point_id = self._locate_fp_bp_id(points, graph.nodes)
|
||||
graph.fp_start = point_id.get('fp_start')
|
||||
graph.bp_end = point_id.get('bp_end')
|
||||
|
||||
graphs_dict.update({graph_id: graph.to_dict()})
|
||||
|
||||
self._mem_summary['static_mem'] += graph.static_mem
|
||||
self._mem_summary['allocations'] += len(tensors_dict) + 1
|
||||
self._mem_summary['deallocations'] += len(tensors_dict) + 1
|
||||
self._peak_mem = max(max(mem_change), self._peak_mem)
|
||||
|
||||
return graphs_dict
|
||||
|
||||
@staticmethod
|
||||
def _parse_tensors(tensors_proto, graph_id):
|
||||
"""Parse tensors."""
|
||||
tensors_dict = {}
|
||||
for tensor_proto in tensors_proto:
|
||||
tensor = Tensor(tensor_proto, graph_id)
|
||||
tensors_dict.update({tensor.tensor_id: tensor})
|
||||
|
||||
return tensors_dict
|
||||
|
||||
def _parse_nodes(self, nodes_proto, mem_change, tensors_dict, graph):
|
||||
"""Parse nodes."""
|
||||
nodes_dict = {}
|
||||
for index, node_proto in enumerate(nodes_proto):
|
||||
node = Node(node_proto, graph.graph_id)
|
||||
tensors = set(node.output_ids + node.workspace_ids)
|
||||
node.size = self._calc_node_memory(tensors, tensors_dict)
|
||||
node.allocations = len(tensors)
|
||||
node.deallocations = len(tensors)
|
||||
|
||||
# calculate the allocated/deallocated memory size on the node
|
||||
if index == 0:
|
||||
node.mem_change = mem_change[index] - graph.static_mem
|
||||
else:
|
||||
node.mem_change = mem_change[index] - mem_change[index-1]
|
||||
|
||||
self._update_nodes(node, tensors_dict)
|
||||
nodes_dict[node.name] = node.to_dict()
|
||||
|
||||
# update active nodes
|
||||
self._active_nodes.push(
|
||||
item=(node.name, node.node_id, node.size, graph.graph_id),
|
||||
priority=-node.size # priority is the negative value of node size
|
||||
)
|
||||
|
||||
return nodes_dict
|
||||
|
||||
def _update_nodes(self, node, tensors_dict):
|
||||
"""Update nodes."""
|
||||
skipped = self._find_conflict_tensors(node)
|
||||
name = node.name
|
||||
if self._framework and name in self._framework:
|
||||
node_frame = self._framework[name]
|
||||
node.fullname = node_frame.get('fullname')
|
||||
info = node_frame.get('args')
|
||||
for key, value in info.items():
|
||||
if 'input' in key:
|
||||
node.inputs.append(value)
|
||||
else:
|
||||
node.outputs.append(value)
|
||||
|
||||
node.inputs = self._fill_tensor_dict(
|
||||
node.inputs, node.input_ids, tensors_dict, 'input'
|
||||
)
|
||||
node.outputs = self._fill_tensor_dict(
|
||||
node.outputs, node.output_ids, tensors_dict, 'output'
|
||||
)
|
||||
node.workspaces = self._fill_tensor_dict(
|
||||
node.workspaces, node.workspace_ids, tensors_dict, 'workspace', skipped
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _find_conflict_tensors(node):
|
||||
"""Find conflict tensors in node."""
|
||||
output_list = []
|
||||
if node.output_ids:
|
||||
output_list = node.output_ids
|
||||
skipped = []
|
||||
if node.workspace_ids:
|
||||
for t_id in node.workspace_ids:
|
||||
if t_id in output_list:
|
||||
skipped.append(t_id)
|
||||
|
||||
return skipped
|
||||
|
||||
@staticmethod
|
||||
def _fill_tensor_dict(node_ios, tensor_ids, tensors_dict, tensor_type, skipped=None):
|
||||
"""Fill tensor dict."""
|
||||
full_list = []
|
||||
for t_id, io_dict in zip(tensor_ids, node_ios):
|
||||
if tensor_type == 'workspace' and t_id in skipped:
|
||||
continue
|
||||
tensor = tensors_dict.get(t_id)
|
||||
tensor.type = tensor_type
|
||||
io_dict.update(tensor.to_dict())
|
||||
full_list.append(io_dict)
|
||||
|
||||
return full_list
|
||||
|
||||
@staticmethod
|
||||
def _calc_node_memory(tensors, tensors_dict):
|
||||
"""Calculate the allocated memory for the node."""
|
||||
node_mem = 0
|
||||
for t_id in tensors:
|
||||
tensor = tensors_dict[t_id]
|
||||
size = tensor.size
|
||||
node_mem += size
|
||||
|
||||
return node_mem
|
||||
|
||||
def _calc_mem_change(self, mem_change, tensors_dict):
|
||||
"""Calculate the memory change for the subgraph."""
|
||||
node_num = len(mem_change)
|
||||
for tensor_id, tensor in tensors_dict.items():
|
||||
life_long = tensor.life_long
|
||||
life_start = tensor.life_start
|
||||
life_end = tensor.life_end
|
||||
size = tensor.size
|
||||
|
||||
# Update memory change for the entire graph.
|
||||
# If a tensor's lifetime cannot be fully located, it will be ignored as 0 change.
|
||||
if life_long == 'LifeLongGraphAll': # lifetime is from graph start to graph end
|
||||
tensor.life_start = 0
|
||||
tensor.life_end = node_num
|
||||
self._update_mem_change(mem_change, size, 0, node_num)
|
||||
elif life_long == 'LifeLongGraphStart': # lifetime is from graph start to tensor end
|
||||
if life_end is not None and life_end >= 0:
|
||||
tensor.life_start = 0
|
||||
self._update_mem_change(mem_change, size, 0, life_end+1)
|
||||
else:
|
||||
logger.info('Cannot locate lifetime end for tensor: %s', tensor_id)
|
||||
elif life_long == 'LifeLongGraphEnd': # lifetime is from tensor start to graph end
|
||||
if life_start is not None and life_start <= node_num:
|
||||
tensor.life_end = node_num
|
||||
self._update_mem_change(mem_change, size, life_start, node_num)
|
||||
else:
|
||||
logger.info('Cannot locate lifetime start for tensor: %s', tensor_id)
|
||||
elif life_long == 'LifeLongNone': # lifetime is from tensor start to tensor end
|
||||
if life_start is not None and life_end is not None and life_start <= life_end:
|
||||
self._update_mem_change(mem_change, size, life_start, life_end+1)
|
||||
else:
|
||||
logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id)
|
||||
|
||||
@staticmethod
|
||||
def _update_mem_change(mem_change, size, start, end):
|
||||
"""Update memory change for the subgraph."""
|
||||
for i in range(start, end):
|
||||
mem_change[i] += size
|
||||
|
||||
@staticmethod
|
||||
def _locate_fp_bp_id(points, nodes):
|
||||
"""Locate the node id of fp_start and bp_end in graph."""
|
||||
point_id = {
|
||||
'fp_start': None,
|
||||
'bp_end': None
|
||||
}
|
||||
fp_start = points.get('fp_start')
|
||||
bp_end = points.get('bp_end')
|
||||
fp_name = fp_start.split('/')[-1] if fp_start else ""
|
||||
bp_name = bp_end.split('/')[-1] if bp_end else ""
|
||||
if fp_name in nodes:
|
||||
point_id['fp_start'] = nodes[fp_name].get('node_id')
|
||||
if bp_name in nodes:
|
||||
point_id['bp_end'] = nodes[bp_name].get('node_id')
|
||||
|
||||
return point_id
|
||||
|
||||
def _write_memory_files(self, filename, content):
|
||||
"""Write the summary and top breakdowns of memory usage."""
|
||||
file_path = os.path.join(self._profiling_dir, filename)
|
||||
file_path = validate_and_normalize_path(file_path)
|
||||
|
||||
try:
|
||||
with open(file_path, 'w') as json_file:
|
||||
json.dump(content, json_file)
|
||||
os.chmod(file_path, stat.S_IREAD | stat.S_IWRITE)
|
||||
except (IOError, OSError) as err:
|
||||
logger.error('Fail to write memory file.\n%s', err)
|
||||
raise ProfilerIOException
|
||||
|
||||
def write_memory_files(self):
|
||||
"""Write memory files."""
|
||||
logger.info('Start recording memory data into files...')
|
||||
# write memory summary to json file
|
||||
summary = self._summary_filename.format(self._device_id)
|
||||
self._write_memory_files(summary, self._mem_summary)
|
||||
|
||||
# write memory details to json file
|
||||
details = self._details_filename.format(self._device_id)
|
||||
self._write_memory_files(details, self._graphs_dict)
|
||||
logger.info('Successfully write memory data into files.')
|
||||
|
||||
def _process_memory_breakdowns(self):
|
||||
"""Process memory breakdowns."""
|
||||
breakdowns = []
|
||||
active_nodes = self._active_nodes.get_items()
|
||||
for _, node_meta in active_nodes:
|
||||
node_name, _, _, graph_id = node_meta
|
||||
graph = self._graphs_dict[graph_id]
|
||||
nodes_dict = graph.get('nodes')
|
||||
node = nodes_dict.get(node_name)
|
||||
if 'inputs' in node:
|
||||
node.pop('inputs')
|
||||
breakdowns.append(node)
|
||||
|
||||
self._mem_summary['breakdowns'] = breakdowns
|
||||
|
||||
@staticmethod
|
||||
def _process_framework_info(aicore_detail_data):
|
||||
"""Process framework info."""
|
||||
framework_info_dict = {}
|
||||
for framework_obj in aicore_detail_data:
|
||||
op_name = framework_obj[0]
|
||||
op_full_name = framework_obj[4]
|
||||
op_info = framework_obj[5]
|
||||
framework_info_dict[op_name] = {
|
||||
'fullname': op_full_name,
|
||||
'name': op_name,
|
||||
'args': op_info
|
||||
}
|
||||
|
||||
return framework_info_dict
|
|
@ -22,7 +22,7 @@ from enum import Enum
|
|||
from mindspore import log as logger, context
|
||||
from mindspore.communication.management import release, get_rank
|
||||
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
|
||||
ProfilerIOException, ProfilerException
|
||||
ProfilerIOException, ProfilerException, ProfilerRawFileException
|
||||
from mindspore.profiler.common.util import get_file_names, fwrite_format
|
||||
from mindspore.profiler.common.validator.validate_path import \
|
||||
validate_and_normalize_path
|
||||
|
@ -31,6 +31,7 @@ from mindspore.profiler.parser.framework_parser import FrameworkParser
|
|||
from mindspore.profiler.parser.hwts_log_parser import HWTSLogParser
|
||||
from mindspore.profiler.parser.integrator import Integrator
|
||||
from mindspore.profiler.parser.integrator import GpuTimelineGenerator, AscendTimelineGenerator
|
||||
from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
|
||||
from mindspore.profiler.parser.minddata_parser import MinddataParser
|
||||
from mindspore.profiler.parser.minddata_pipeline_parser import \
|
||||
MinddataPipelineParser
|
||||
|
@ -249,8 +250,9 @@ class Profiler:
|
|||
logger.warning(err.message)
|
||||
|
||||
# analyse step trace info
|
||||
points = None
|
||||
try:
|
||||
self._analyse_step_trace(source_path, framework_parser)
|
||||
points = self._analyse_step_trace(source_path, framework_parser)
|
||||
except ProfilerException as err:
|
||||
logger.warning(err.message)
|
||||
|
||||
|
@ -260,6 +262,12 @@ class Profiler:
|
|||
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
||||
logger.warning('Fail to write timeline data: %s', err)
|
||||
|
||||
# analyse memory usage info
|
||||
try:
|
||||
self._analyse_memory_usage(points)
|
||||
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
|
||||
logger.warning(err.message)
|
||||
|
||||
os.environ['PROFILING_MODE'] = str("false")
|
||||
context.set_context(enable_profiling=False)
|
||||
|
||||
|
@ -316,6 +324,8 @@ class Profiler:
|
|||
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
|
||||
logger.info("The point info is: %s", point_info)
|
||||
|
||||
return point_info
|
||||
|
||||
def _analyse_timeline(self, aicpu_parser, optime_parser):
|
||||
"""
|
||||
Analyse and parse timeline info.
|
||||
|
@ -366,6 +376,14 @@ class Profiler:
|
|||
logger.warning('Fail to write timeline data: %s', err)
|
||||
raise RuntimeError('Fail to write timeline data.')
|
||||
|
||||
def _analyse_memory_usage(self, points):
|
||||
"""Analyse memory usage data."""
|
||||
integrator = Integrator(self._output_path, self._dev_id)
|
||||
aicore_detail_data = integrator.get_aicore_detail_data()
|
||||
memory_parser = MemoryUsageParser(self._output_path, self._dev_id)
|
||||
memory_parser.init_memory_usage_info(aicore_detail_data, points)
|
||||
memory_parser.write_memory_files()
|
||||
|
||||
def _get_profiling_job_id(self):
|
||||
"""Get profiling job id, which was generated by ada service.
|
||||
|
||||
|
|
Loading…
Reference in New Issue