!9206 fix memory exceed bug in Robustness and ValueError in Occlusion.

From: @yuhanshi Reviewed-by: Signed-off-by:
2020-12-01 15:17:34 +08:00 · 2020-12-01 15:17:34 +08:00 · ecc9f00c3c
parent a16c87ded4 3b4ab344b0
commit ecc9f00c3c
4 changed files with 111 additions and 88 deletions
--- a/mindspore/explainer/benchmark/_attribution/class_sensitivity.py
+++ b/mindspore/explainer/benchmark/_attribution/class_sensitivity.py
@ -22,15 +22,14 @@ from ..._utils import calc_correlation


 class ClassSensitivity(LabelAgnosticMetric):
-    r"""
+    """
    Class sensitivity metric used to evaluate attribution-based explanations.

    Reasonable atrribution-based explainers are expected to generate distinct saliency maps for different labels,
-    especially for labels of highest confidence and low confidence. Class sensitivity evaluates the explainer through
+    especially for labels of highest confidence and low confidence. ClassSensitivity evaluates the explainer through
    computing the correlation between saliency maps of highest-confidence and lowest-confidence labels. Explainer with
    better class sensitivity will receive lower correlation score. To make the evaluation results intuitive, the
    returned score will take negative on correlation and normalize.
-
    """

    def evaluate(self, explainer, inputs):
@ -46,12 +45,18 @@ class ClassSensitivity(LabelAgnosticMetric):

        Examples:
            >>> import mindspore as ms
+            >>> from mindspore.explainer.benchmark import ClassSensitivity
            >>> from mindspore.explainer.explanation import Gradient
-            >>> model = resnet(10)
-            >>> gradient = Gradient(model)
-            >>> x = ms.Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
+            >>> from mindspore.train.serialization import load_checkpoint, load_param_into_net
+            >>> # prepare your network and load the trained checkpoint file, e.g., resnet50.
+            >>> network = resnet50(10)
+            >>> param_dict = load_checkpoint("resnet50.ckpt")
+            >>> load_param_into_net(network, param_dict)
+            >>> # prepare your explainer to be evaluated, e.g., Gradient.
+            >>> gradient = Gradient(network)
+            >>> input_x = ms.Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
            >>> class_sensitivity = ClassSensitivity()
-            >>> res = class_sensitivity.evaluate(gradient, x)
+            >>> res = class_sensitivity.evaluate(gradient, input_x)
        """
        self._check_evaluate_param(explainer, inputs)

--- a/mindspore/explainer/benchmark/_attribution/robustness.py
+++ b/mindspore/explainer/benchmark/_attribution/robustness.py
@ -32,6 +32,7 @@ class Robustness(LabelSensitiveMetric):
        num_labels (int): Number of classes in the dataset.

    Examples:
+        >>> # Initialize a Robustness benchmarker passing num_labels of the dataset.
        >>> from mindspore.explainer.benchmark import Robustness
        >>> num_labels = 100
        >>> robustness = Robustness(num_labels)
@ -41,7 +42,7 @@ class Robustness(LabelSensitiveMetric):
        super().__init__(num_labels)

        self._perturb = RandomPerturb()
-        self._num_perturbations = 100  # number of perturbations used in evaluation
+        self._num_perturbations = 10  # number of perturbations used in evaluation
        self._threshold = 0.1  # threshold to generate perturbation
        self._activation_fn = activation_fn

@ -68,12 +69,17 @@ class Robustness(LabelSensitiveMetric):
            ValueError: If batch_size is larger than 1.

        Examples:
-            >>> # init an explainer, the network should contain the output activation function.
            >>> from mindspore.explainer.explanation import Gradient
            >>> from mindspore.explainer.benchmark import Robustness
+            >>> from mindspore.train.serialization import load_checkpoint, load_param_into_net
+            >>> # prepare your network and load the trained checkpoint file, e.g., resnet50.
+            >>> network = resnet50(10)
+            >>> param_dict = load_checkpoint("resnet50.ckpt")
+            >>> load_param_into_net(network, param_dict)
+            >>> # prepare your explainer to be evaluated, e.g., Gradient.
            >>> gradient = Gradient(network)
            >>> input_x = ms.Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
-            >>> target_label = 5
+            >>> target_label = ms.Tensor([0], ms.int32)
            >>> robustness = Robustness(num_labels=10)
            >>> res = robustness.evaluate(gradient, input_x, target_label)
        """
@ -84,39 +90,48 @@ class Robustness(LabelSensitiveMetric):

        inputs_np = inputs.asnumpy()
        if isinstance(targets, int):
-            targets = ms.Tensor(targets, ms.int32)
+            targets = ms.Tensor([targets], ms.int32)
        if saliency is None:
            saliency = explainer(inputs, targets)
        saliency_np = saliency.asnumpy()
+
        norm = np.sqrt(np.sum(np.square(saliency_np), axis=tuple(range(1, len(saliency_np.shape)))))
-        if norm == 0:
+        if (norm == 0).any():
            log.warning('Get saliency norm equals 0, robustness return NaN for zero-norm saliency currently.')
-            return np.array([np.nan])
+            norm[norm == 0] = np.nan

-        perturbations = []
-        for sample in inputs_np:
-            sample = np.expand_dims(sample, axis=0)
-            perturbations_per_input = []
-            for _ in range(self._num_perturbations):
-                perturbation = self._perturb(sample)
-                perturbations_per_input.append(perturbation)
-            perturbations_per_input = np.vstack(perturbations_per_input)
-            perturbations.append(perturbations_per_input)
-        perturbations = np.stack(perturbations, axis=0)
-
-        perturbations = np.reshape(perturbations, (-1,) + inputs_np.shape[1:])
-        perturbations = ms.Tensor(perturbations, ms.float32)
-
-        repeated_targets = np.repeat(targets.asnumpy(), repeats=self._num_perturbations, axis=0)
-        repeated_targets = ms.Tensor(repeated_targets, ms.int32)
-        saliency_of_perturbations = explainer(perturbations, repeated_targets)
-        perturbations_saliency = saliency_of_perturbations.asnumpy()
-
-        repeated_saliency = np.repeat(saliency_np, repeats=self._num_perturbations, axis=0)
-
-        sensitivities = np.sum((repeated_saliency - perturbations_saliency) ** 2,
-                               axis=tuple(range(1, len(repeated_saliency.shape))))
-
-        max_sensitivity = np.max(sensitivities.reshape((norm.shape[0], -1)), axis=1) / norm
+        model = nn.SequentialCell([explainer.model, self._activation_fn])
+        original_outputs = model(inputs).asnumpy()
+        sensitivities = []
+        for _ in range(self._num_perturbations):
+            perturbations = []
+            for j, sample in enumerate(inputs_np):
+                perturbation_on_single_sample = self._perturb_with_threshold(model,
+                                                                             np.expand_dims(sample, axis=0),
+                                                                             original_outputs[j])
+                perturbations.append(perturbation_on_single_sample)
+            perturbations = np.vstack(perturbations)
+            perturbations_saliency = explainer(ms.Tensor(perturbations, ms.float32), targets).asnumpy()
+            sensitivity = np.sum((perturbations_saliency - saliency_np) ** 2,
+                                 axis=tuple(range(1, len(saliency_np.shape))))
+            sensitivities.append(sensitivity)
+        sensitivities = np.stack(sensitivities, axis=-1)
+        max_sensitivity = np.max(sensitivities, axis=1) / norm
        robustness_res = 1 / np.exp(max_sensitivity)
        return robustness_res
+
+    def _perturb_with_threshold(self, model: nn.Cell, sample: np.ndarray, original_output: np.ndarray) -> np.ndarray:
+        """
+        Generate the perturbation until the L2-distance between original_output and perturbation_output is lower than
+        the given self._threshold or until the attempt reaches the max_attempt_time.
+        """
+        # the maximum time attempt to get a perturbation with perturb_error low than self._threshold
+        max_attempt_time = 3
+        perturbation = None
+        for _ in range(max_attempt_time):
+            perturbation = self._perturb(sample)
+            perturbation_output = self._activation_fn(model(ms.Tensor(sample, ms.float32))).asnumpy()
+            perturb_error = np.linalg.norm(original_output - perturbation_output)
+            if perturb_error <= self._threshold:
+                return perturbation
+        return perturbation
--- a/mindspore/explainer/explanation/_attribution/_perturbation/occlusion.py
+++ b/mindspore/explainer/explanation/_attribution/_perturbation/occlusion.py
@ -14,14 +14,11 @@
 # ============================================================================
 """Occlusion explainer."""

-import math
-
 import numpy as np
 from numpy.lib.stride_tricks import as_strided

 import mindspore as ms
 import mindspore.nn as nn
-from mindspore import Tensor
 from .ablation import Ablation
 from .perturbation import PerturbationAttribution
 from .replacement import Constant
@ -62,8 +59,8 @@ class Occlusion(PerturbationAttribution):
        network (Cell): Specify the black-box model to be explained.

    Inputs:
-        inputs (Tensor): The input data to be explained, a 4D tensor of shape :math:`(N, C, H, W)`.
-        targets (Tensor, int): The label of interest. It should be a 1D or 0D tensor, or an integer.
+        - **inputs** (Tensor) - The input data to be explained, a 4D tensor of shape :math:`(N, C, H, W)`.
+        - **targets** (Tensor, int) - The label of interest. It should be a 1D or 0D tensor, or an integer.
            If it is a 1D tensor, its length should be the same as `inputs`.

    Outputs:
@ -72,13 +69,15 @@ class Occlusion(PerturbationAttribution):
    Example:
        >>> from mindspore.explainer.explanation import Occlusion
        >>> from mindspore.train.serialization import load_checkpoint, load_param_into_net
+        >>> # prepare your network and load the trained checkpoint file, e.g., resnet50.
        >>> network = resnet50(10)
        >>> param_dict = load_checkpoint("resnet50.ckpt")
        >>> load_param_into_net(network, param_dict)
+        >>> # initialize Occlusion explainer and pass the pretrained model
        >>> occlusion = Occlusion(network)
-        >>> x = Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
-        >>> label = 1
-        >>> saliency = occlusion(x, label)
+        >>> input_x = ms.Tensor(np.random.rand(1, 3, 224, 224), ms.float32)
+        >>> label = ms.Tensor([1], ms.int32)
+        >>> saliency = occlusion(input_x, label)
    """

    def __init__(self, network, activation_fn=nn.Softmax()):
@ -88,62 +87,63 @@ class Occlusion(PerturbationAttribution):
        self._aggregation_fn = abs_max
        self._get_replacement = Constant(base_value=0.0)
        self._num_sample_per_dim = 32  # specify the number of perturbations each dimension.
-        self._num_per_eval = 32  # number of perturbations each evaluation step.
+        self._num_per_eval = 2  # number of perturbations generate for each sample per evaluation step.

    def __call__(self, inputs, targets):
        """Call function for 'Occlusion'."""
        self._verify_data(inputs, targets)

-        inputs = inputs.asnumpy()
-        targets = targets.asnumpy() if isinstance(targets, Tensor) else np.array([targets] * inputs.shape[0], np.int)
+        inputs_np = inputs.asnumpy()
+        targets_np = targets.asnumpy() if isinstance(targets, ms.Tensor) else np.array([targets], np.int)

-        # If spatial size of input data is smaller than self._num_sample_per_dim, window_size and strides will set to
-        # `(C, 3, 3)` and `(C, 1, 1)` separately.
-        window_size = tuple(
-            [inputs.shape[1]]
-            + [x % self._num_sample_per_dim if x > self._num_sample_per_dim else 3 for x in inputs.shape[2:]])
-        strides = tuple(
-            [inputs.shape[1]]
-            + [x // self._num_sample_per_dim if x > self._num_sample_per_dim else 1 for x in inputs.shape[2:]])
+        batch_size = inputs_np.shape[0]
+        window_size, strides = self._get_window_size_and_strides(inputs_np)

        model = nn.SequentialCell([self._model, self._activation_fn])

-        original_outputs = model(Tensor(inputs, ms.float32)).asnumpy()[np.arange(len(targets)), targets]
+        original_outputs = model(ms.Tensor(inputs, ms.float32)).asnumpy()[np.arange(batch_size), targets_np]

-        total_attribution = np.zeros_like(inputs)
-        weights = np.ones_like(inputs)
-        masks = Occlusion._generate_masks(inputs, window_size, strides)
+        total_attribution = np.zeros_like(inputs_np)
+        weights = np.ones_like(inputs_np)
+        masks = Occlusion._generate_masks(inputs_np, window_size, strides)
        num_perturbations = masks.shape[1]
-        original_outputs_repeat = np.repeat(original_outputs, repeats=num_perturbations, axis=0)
+        reference = self._get_replacement(inputs_np)

-        reference = self._get_replacement(inputs)
-        occluded_inputs = self._ablation(inputs, reference, masks)
-        targets_repeat = np.repeat(targets, repeats=num_perturbations, axis=0)
-
-        occluded_inputs = occluded_inputs.reshape((-1, *inputs.shape[1:]))
-        if occluded_inputs.shape[0] > self._num_per_eval:
-            cal_time = math.ceil(occluded_inputs.shape[0] / self._num_per_eval)
-            occluded_outputs = []
-            for i in range(cal_time):
-                occluded_input = occluded_inputs[i*self._num_per_eval
-                                                 :min((i+1) * self._num_per_eval, occluded_inputs.shape[0])]
-                target = targets_repeat[i*self._num_per_eval
-                                        :min((i+1) * self._num_per_eval, occluded_inputs.shape[0])]
-                occluded_output = model(Tensor(occluded_input)).asnumpy()[np.arange(target.shape[0]), target]
-                occluded_outputs.append(occluded_output)
-            occluded_outputs = np.concatenate(occluded_outputs)
-        else:
-            occluded_outputs = model(Tensor(occluded_inputs)).asnumpy()[np.arange(len(targets_repeat)), targets_repeat]
-        outputs_diff = original_outputs_repeat - occluded_outputs
-        outputs_diff = outputs_diff.reshape(inputs.shape[0], -1)
-
-        total_attribution += (
-            outputs_diff.reshape(outputs_diff.shape + (1,) * (len(masks.shape) - 2)) * masks).sum(axis=1).clip(1e-6)
-        weights += masks.sum(axis=1)
-
-        attribution = self._aggregation_fn(Tensor(total_attribution / weights))
+        count = 0
+        while count < num_perturbations:
+            ith_masks = masks[:, count:min(count+self._num_per_eval, num_perturbations)]
+            actual_num_eval = ith_masks.shape[1]
+            num_samples = batch_size * actual_num_eval
+            occluded_inputs = self._ablation(inputs_np, reference, ith_masks)
+            occluded_inputs = occluded_inputs.reshape((-1, *inputs_np.shape[1:]))
+            targets_repeat = np.repeat(targets_np, repeats=actual_num_eval, axis=0)
+            occluded_outputs = model(
+                ms.Tensor(occluded_inputs, ms.float32)).asnumpy()[np.arange(num_samples), targets_repeat]
+            original_outputs_repeat = np.repeat(original_outputs, repeats=actual_num_eval, axis=0)
+            outputs_diff = original_outputs_repeat - occluded_outputs
+            total_attribution += (
+                outputs_diff.reshape(ith_masks.shape[:2] + (1,) * (len(masks.shape) - 2)) * ith_masks).sum(axis=1)
+            weights += ith_masks.sum(axis=1)
+            count += actual_num_eval
+        attribution = self._aggregation_fn(ms.Tensor(total_attribution / weights, ms.float32))
        return attribution

+    def _get_window_size_and_strides(self, inputs):
+        """
+        Return window_size and strides.
+
+        # If spatial size of input data is smaller than self._num_sample_per_dim, window_size and strides will set to
+        # `(C, 3, 3)` and `(C, 1, 1)` separately. Otherwise, the window_size and strides will generated adaptively to
+        match self._num_sample_per_dim.
+        """
+        window_size = tuple(
+            [inputs.shape[1]]
+            + [x // self._num_sample_per_dim if x > self._num_sample_per_dim else 3 for x in inputs.shape[2:]])
+        strides = tuple(
+            [inputs.shape[1]]
+            + [x // self._num_sample_per_dim if x > self._num_sample_per_dim else 1 for x in inputs.shape[2:]])
+        return window_size, strides
+
    @staticmethod
    def _generate_masks(inputs, window_size, strides):
        """Generate masks to perturb contiguous regions."""
--- a/mindspore/explainer/explanation/_attribution/attribution.py
+++ b/mindspore/explainer/explanation/_attribution/attribution.py
@ -72,3 +72,6 @@ class Attribution:
            if len(targets.shape) > 1 or (len(targets.shape) == 1 and len(targets) != len(inputs)):
                raise ValueError('Argument targets must be a 1D or 0D Tensor. If it is a 1D Tensor, '
                                 'it should have the same length as inputs.')
+        elif inputs.shape[0] != 1:
+            raise ValueError('If targets have type of int, batch_size of inputs should equals 1. Receive batch_size {}'
+                             .format(inputs.shape[0]))