forked from mindspore-Ecosystem/mindspore
!9206 fix memory exceed bug in Robustness and ValueError in Occlusion.
From: @yuhanshi Reviewed-by: Signed-off-by:
This commit is contained in:
commit
ecc9f00c3c
|
@ -22,15 +22,14 @@ from ..._utils import calc_correlation
|
|||
|
||||
|
||||
class ClassSensitivity(LabelAgnosticMetric):
|
||||
r"""
|
||||
"""
|
||||
Class sensitivity metric used to evaluate attribution-based explanations.
|
||||
|
||||
Reasonable atrribution-based explainers are expected to generate distinct saliency maps for different labels,
|
||||
especially for labels of highest confidence and low confidence. Class sensitivity evaluates the explainer through
|
||||
especially for labels of highest confidence and low confidence. ClassSensitivity evaluates the explainer through
|
||||
computing the correlation between saliency maps of highest-confidence and lowest-confidence labels. Explainer with
|
||||
better class sensitivity will receive lower correlation score. To make the evaluation results intuitive, the
|
||||
returned score will take negative on correlation and normalize.
|
||||
|
||||
"""
|
||||
|
||||
def evaluate(self, explainer, inputs):
|
||||
|
@ -46,12 +45,18 @@ class ClassSensitivity(LabelAgnosticMetric):
|
|||
|
||||
Examples:
|
||||
>>> import mindspore as ms
|
||||
>>> from mindspore.explainer.benchmark import ClassSensitivity
|
||||
>>> from mindspore.explainer.explanation import Gradient
|
||||
>>> model = resnet(10)
|
||||
>>> gradient = Gradient(model)
|
||||
>>> x = ms.Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
|
||||
>>> from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
>>> # prepare your network and load the trained checkpoint file, e.g., resnet50.
|
||||
>>> network = resnet50(10)
|
||||
>>> param_dict = load_checkpoint("resnet50.ckpt")
|
||||
>>> load_param_into_net(network, param_dict)
|
||||
>>> # prepare your explainer to be evaluated, e.g., Gradient.
|
||||
>>> gradient = Gradient(network)
|
||||
>>> input_x = ms.Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
|
||||
>>> class_sensitivity = ClassSensitivity()
|
||||
>>> res = class_sensitivity.evaluate(gradient, x)
|
||||
>>> res = class_sensitivity.evaluate(gradient, input_x)
|
||||
"""
|
||||
self._check_evaluate_param(explainer, inputs)
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ class Robustness(LabelSensitiveMetric):
|
|||
num_labels (int): Number of classes in the dataset.
|
||||
|
||||
Examples:
|
||||
>>> # Initialize a Robustness benchmarker passing num_labels of the dataset.
|
||||
>>> from mindspore.explainer.benchmark import Robustness
|
||||
>>> num_labels = 100
|
||||
>>> robustness = Robustness(num_labels)
|
||||
|
@ -41,7 +42,7 @@ class Robustness(LabelSensitiveMetric):
|
|||
super().__init__(num_labels)
|
||||
|
||||
self._perturb = RandomPerturb()
|
||||
self._num_perturbations = 100 # number of perturbations used in evaluation
|
||||
self._num_perturbations = 10 # number of perturbations used in evaluation
|
||||
self._threshold = 0.1 # threshold to generate perturbation
|
||||
self._activation_fn = activation_fn
|
||||
|
||||
|
@ -68,12 +69,17 @@ class Robustness(LabelSensitiveMetric):
|
|||
ValueError: If batch_size is larger than 1.
|
||||
|
||||
Examples:
|
||||
>>> # init an explainer, the network should contain the output activation function.
|
||||
>>> from mindspore.explainer.explanation import Gradient
|
||||
>>> from mindspore.explainer.benchmark import Robustness
|
||||
>>> from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
>>> # prepare your network and load the trained checkpoint file, e.g., resnet50.
|
||||
>>> network = resnet50(10)
|
||||
>>> param_dict = load_checkpoint("resnet50.ckpt")
|
||||
>>> load_param_into_net(network, param_dict)
|
||||
>>> # prepare your explainer to be evaluated, e.g., Gradient.
|
||||
>>> gradient = Gradient(network)
|
||||
>>> input_x = ms.Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
|
||||
>>> target_label = 5
|
||||
>>> target_label = ms.Tensor([0], ms.int32)
|
||||
>>> robustness = Robustness(num_labels=10)
|
||||
>>> res = robustness.evaluate(gradient, input_x, target_label)
|
||||
"""
|
||||
|
@ -84,39 +90,48 @@ class Robustness(LabelSensitiveMetric):
|
|||
|
||||
inputs_np = inputs.asnumpy()
|
||||
if isinstance(targets, int):
|
||||
targets = ms.Tensor(targets, ms.int32)
|
||||
targets = ms.Tensor([targets], ms.int32)
|
||||
if saliency is None:
|
||||
saliency = explainer(inputs, targets)
|
||||
saliency_np = saliency.asnumpy()
|
||||
|
||||
norm = np.sqrt(np.sum(np.square(saliency_np), axis=tuple(range(1, len(saliency_np.shape)))))
|
||||
if norm == 0:
|
||||
if (norm == 0).any():
|
||||
log.warning('Get saliency norm equals 0, robustness return NaN for zero-norm saliency currently.')
|
||||
return np.array([np.nan])
|
||||
norm[norm == 0] = np.nan
|
||||
|
||||
perturbations = []
|
||||
for sample in inputs_np:
|
||||
sample = np.expand_dims(sample, axis=0)
|
||||
perturbations_per_input = []
|
||||
for _ in range(self._num_perturbations):
|
||||
perturbation = self._perturb(sample)
|
||||
perturbations_per_input.append(perturbation)
|
||||
perturbations_per_input = np.vstack(perturbations_per_input)
|
||||
perturbations.append(perturbations_per_input)
|
||||
perturbations = np.stack(perturbations, axis=0)
|
||||
|
||||
perturbations = np.reshape(perturbations, (-1,) + inputs_np.shape[1:])
|
||||
perturbations = ms.Tensor(perturbations, ms.float32)
|
||||
|
||||
repeated_targets = np.repeat(targets.asnumpy(), repeats=self._num_perturbations, axis=0)
|
||||
repeated_targets = ms.Tensor(repeated_targets, ms.int32)
|
||||
saliency_of_perturbations = explainer(perturbations, repeated_targets)
|
||||
perturbations_saliency = saliency_of_perturbations.asnumpy()
|
||||
|
||||
repeated_saliency = np.repeat(saliency_np, repeats=self._num_perturbations, axis=0)
|
||||
|
||||
sensitivities = np.sum((repeated_saliency - perturbations_saliency) ** 2,
|
||||
axis=tuple(range(1, len(repeated_saliency.shape))))
|
||||
|
||||
max_sensitivity = np.max(sensitivities.reshape((norm.shape[0], -1)), axis=1) / norm
|
||||
model = nn.SequentialCell([explainer.model, self._activation_fn])
|
||||
original_outputs = model(inputs).asnumpy()
|
||||
sensitivities = []
|
||||
for _ in range(self._num_perturbations):
|
||||
perturbations = []
|
||||
for j, sample in enumerate(inputs_np):
|
||||
perturbation_on_single_sample = self._perturb_with_threshold(model,
|
||||
np.expand_dims(sample, axis=0),
|
||||
original_outputs[j])
|
||||
perturbations.append(perturbation_on_single_sample)
|
||||
perturbations = np.vstack(perturbations)
|
||||
perturbations_saliency = explainer(ms.Tensor(perturbations, ms.float32), targets).asnumpy()
|
||||
sensitivity = np.sum((perturbations_saliency - saliency_np) ** 2,
|
||||
axis=tuple(range(1, len(saliency_np.shape))))
|
||||
sensitivities.append(sensitivity)
|
||||
sensitivities = np.stack(sensitivities, axis=-1)
|
||||
max_sensitivity = np.max(sensitivities, axis=1) / norm
|
||||
robustness_res = 1 / np.exp(max_sensitivity)
|
||||
return robustness_res
|
||||
|
||||
def _perturb_with_threshold(self, model: nn.Cell, sample: np.ndarray, original_output: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
Generate the perturbation until the L2-distance between original_output and perturbation_output is lower than
|
||||
the given self._threshold or until the attempt reaches the max_attempt_time.
|
||||
"""
|
||||
# the maximum time attempt to get a perturbation with perturb_error low than self._threshold
|
||||
max_attempt_time = 3
|
||||
perturbation = None
|
||||
for _ in range(max_attempt_time):
|
||||
perturbation = self._perturb(sample)
|
||||
perturbation_output = self._activation_fn(model(ms.Tensor(sample, ms.float32))).asnumpy()
|
||||
perturb_error = np.linalg.norm(original_output - perturbation_output)
|
||||
if perturb_error <= self._threshold:
|
||||
return perturbation
|
||||
return perturbation
|
||||
|
|
|
@ -14,14 +14,11 @@
|
|||
# ============================================================================
|
||||
"""Occlusion explainer."""
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from numpy.lib.stride_tricks import as_strided
|
||||
|
||||
import mindspore as ms
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from .ablation import Ablation
|
||||
from .perturbation import PerturbationAttribution
|
||||
from .replacement import Constant
|
||||
|
@ -62,8 +59,8 @@ class Occlusion(PerturbationAttribution):
|
|||
network (Cell): Specify the black-box model to be explained.
|
||||
|
||||
Inputs:
|
||||
inputs (Tensor): The input data to be explained, a 4D tensor of shape :math:`(N, C, H, W)`.
|
||||
targets (Tensor, int): The label of interest. It should be a 1D or 0D tensor, or an integer.
|
||||
- **inputs** (Tensor) - The input data to be explained, a 4D tensor of shape :math:`(N, C, H, W)`.
|
||||
- **targets** (Tensor, int) - The label of interest. It should be a 1D or 0D tensor, or an integer.
|
||||
If it is a 1D tensor, its length should be the same as `inputs`.
|
||||
|
||||
Outputs:
|
||||
|
@ -72,13 +69,15 @@ class Occlusion(PerturbationAttribution):
|
|||
Example:
|
||||
>>> from mindspore.explainer.explanation import Occlusion
|
||||
>>> from mindspore.train.serialization import load_checkpoint, load_param_into_net
|
||||
>>> # prepare your network and load the trained checkpoint file, e.g., resnet50.
|
||||
>>> network = resnet50(10)
|
||||
>>> param_dict = load_checkpoint("resnet50.ckpt")
|
||||
>>> load_param_into_net(network, param_dict)
|
||||
>>> # initialize Occlusion explainer and pass the pretrained model
|
||||
>>> occlusion = Occlusion(network)
|
||||
>>> x = Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
|
||||
>>> label = 1
|
||||
>>> saliency = occlusion(x, label)
|
||||
>>> input_x = ms.Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
|
||||
>>> label = ms.Tensor([1], ms.int32)
|
||||
>>> saliency = occlusion(input_x, label)
|
||||
"""
|
||||
|
||||
def __init__(self, network, activation_fn=nn.Softmax()):
|
||||
|
@ -88,62 +87,63 @@ class Occlusion(PerturbationAttribution):
|
|||
self._aggregation_fn = abs_max
|
||||
self._get_replacement = Constant(base_value=0.0)
|
||||
self._num_sample_per_dim = 32 # specify the number of perturbations each dimension.
|
||||
self._num_per_eval = 32 # number of perturbations each evaluation step.
|
||||
self._num_per_eval = 2 # number of perturbations generate for each sample per evaluation step.
|
||||
|
||||
def __call__(self, inputs, targets):
|
||||
"""Call function for 'Occlusion'."""
|
||||
self._verify_data(inputs, targets)
|
||||
|
||||
inputs = inputs.asnumpy()
|
||||
targets = targets.asnumpy() if isinstance(targets, Tensor) else np.array([targets] * inputs.shape[0], np.int)
|
||||
inputs_np = inputs.asnumpy()
|
||||
targets_np = targets.asnumpy() if isinstance(targets, ms.Tensor) else np.array([targets], np.int)
|
||||
|
||||
# If spatial size of input data is smaller than self._num_sample_per_dim, window_size and strides will set to
|
||||
# `(C, 3, 3)` and `(C, 1, 1)` separately.
|
||||
window_size = tuple(
|
||||
[inputs.shape[1]]
|
||||
+ [x % self._num_sample_per_dim if x > self._num_sample_per_dim else 3 for x in inputs.shape[2:]])
|
||||
strides = tuple(
|
||||
[inputs.shape[1]]
|
||||
+ [x // self._num_sample_per_dim if x > self._num_sample_per_dim else 1 for x in inputs.shape[2:]])
|
||||
batch_size = inputs_np.shape[0]
|
||||
window_size, strides = self._get_window_size_and_strides(inputs_np)
|
||||
|
||||
model = nn.SequentialCell([self._model, self._activation_fn])
|
||||
|
||||
original_outputs = model(Tensor(inputs, ms.float32)).asnumpy()[np.arange(len(targets)), targets]
|
||||
original_outputs = model(ms.Tensor(inputs, ms.float32)).asnumpy()[np.arange(batch_size), targets_np]
|
||||
|
||||
total_attribution = np.zeros_like(inputs)
|
||||
weights = np.ones_like(inputs)
|
||||
masks = Occlusion._generate_masks(inputs, window_size, strides)
|
||||
total_attribution = np.zeros_like(inputs_np)
|
||||
weights = np.ones_like(inputs_np)
|
||||
masks = Occlusion._generate_masks(inputs_np, window_size, strides)
|
||||
num_perturbations = masks.shape[1]
|
||||
original_outputs_repeat = np.repeat(original_outputs, repeats=num_perturbations, axis=0)
|
||||
reference = self._get_replacement(inputs_np)
|
||||
|
||||
reference = self._get_replacement(inputs)
|
||||
occluded_inputs = self._ablation(inputs, reference, masks)
|
||||
targets_repeat = np.repeat(targets, repeats=num_perturbations, axis=0)
|
||||
|
||||
occluded_inputs = occluded_inputs.reshape((-1, *inputs.shape[1:]))
|
||||
if occluded_inputs.shape[0] > self._num_per_eval:
|
||||
cal_time = math.ceil(occluded_inputs.shape[0] / self._num_per_eval)
|
||||
occluded_outputs = []
|
||||
for i in range(cal_time):
|
||||
occluded_input = occluded_inputs[i*self._num_per_eval
|
||||
:min((i+1) * self._num_per_eval, occluded_inputs.shape[0])]
|
||||
target = targets_repeat[i*self._num_per_eval
|
||||
:min((i+1) * self._num_per_eval, occluded_inputs.shape[0])]
|
||||
occluded_output = model(Tensor(occluded_input)).asnumpy()[np.arange(target.shape[0]), target]
|
||||
occluded_outputs.append(occluded_output)
|
||||
occluded_outputs = np.concatenate(occluded_outputs)
|
||||
else:
|
||||
occluded_outputs = model(Tensor(occluded_inputs)).asnumpy()[np.arange(len(targets_repeat)), targets_repeat]
|
||||
outputs_diff = original_outputs_repeat - occluded_outputs
|
||||
outputs_diff = outputs_diff.reshape(inputs.shape[0], -1)
|
||||
|
||||
total_attribution += (
|
||||
outputs_diff.reshape(outputs_diff.shape + (1,) * (len(masks.shape) - 2)) * masks).sum(axis=1).clip(1e-6)
|
||||
weights += masks.sum(axis=1)
|
||||
|
||||
attribution = self._aggregation_fn(Tensor(total_attribution / weights))
|
||||
count = 0
|
||||
while count < num_perturbations:
|
||||
ith_masks = masks[:, count:min(count+self._num_per_eval, num_perturbations)]
|
||||
actual_num_eval = ith_masks.shape[1]
|
||||
num_samples = batch_size * actual_num_eval
|
||||
occluded_inputs = self._ablation(inputs_np, reference, ith_masks)
|
||||
occluded_inputs = occluded_inputs.reshape((-1, *inputs_np.shape[1:]))
|
||||
targets_repeat = np.repeat(targets_np, repeats=actual_num_eval, axis=0)
|
||||
occluded_outputs = model(
|
||||
ms.Tensor(occluded_inputs, ms.float32)).asnumpy()[np.arange(num_samples), targets_repeat]
|
||||
original_outputs_repeat = np.repeat(original_outputs, repeats=actual_num_eval, axis=0)
|
||||
outputs_diff = original_outputs_repeat - occluded_outputs
|
||||
total_attribution += (
|
||||
outputs_diff.reshape(ith_masks.shape[:2] + (1,) * (len(masks.shape) - 2)) * ith_masks).sum(axis=1)
|
||||
weights += ith_masks.sum(axis=1)
|
||||
count += actual_num_eval
|
||||
attribution = self._aggregation_fn(ms.Tensor(total_attribution / weights, ms.float32))
|
||||
return attribution
|
||||
|
||||
def _get_window_size_and_strides(self, inputs):
|
||||
"""
|
||||
Return window_size and strides.
|
||||
|
||||
# If spatial size of input data is smaller than self._num_sample_per_dim, window_size and strides will set to
|
||||
# `(C, 3, 3)` and `(C, 1, 1)` separately. Otherwise, the window_size and strides will generated adaptively to
|
||||
match self._num_sample_per_dim.
|
||||
"""
|
||||
window_size = tuple(
|
||||
[inputs.shape[1]]
|
||||
+ [x // self._num_sample_per_dim if x > self._num_sample_per_dim else 3 for x in inputs.shape[2:]])
|
||||
strides = tuple(
|
||||
[inputs.shape[1]]
|
||||
+ [x // self._num_sample_per_dim if x > self._num_sample_per_dim else 1 for x in inputs.shape[2:]])
|
||||
return window_size, strides
|
||||
|
||||
@staticmethod
|
||||
def _generate_masks(inputs, window_size, strides):
|
||||
"""Generate masks to perturb contiguous regions."""
|
||||
|
|
|
@ -72,3 +72,6 @@ class Attribution:
|
|||
if len(targets.shape) > 1 or (len(targets.shape) == 1 and len(targets) != len(inputs)):
|
||||
raise ValueError('Argument targets must be a 1D or 0D Tensor. If it is a 1D Tensor, '
|
||||
'it should have the same length as inputs.')
|
||||
elif inputs.shape[0] != 1:
|
||||
raise ValueError('If targets have type of int, batch_size of inputs should equals 1. Receive batch_size {}'
|
||||
.format(inputs.shape[0]))
|
||||
|
|
Loading…
Reference in New Issue