Source code for hybrid_learning.datasets.transforms.image_transforms

"""Transformations to images.
The images are assumed to be a :py:class:`torch.Tensor` of a
:py:class:`PIL.Image.Image`.
Use :py:class:`torchvision.transforms.ToTensor` to transform
:py:class:`PIL.Image.Image` instances appropriately.
"""
#  Copyright (c) 2022 Continental Automotive GmbH

import abc
from typing import Tuple, Callable, Dict, Any, Optional, Union, Sequence, \
    Mapping, List

import PIL.Image
import numpy as np
import torch
import torch.nn.functional
import torchvision as tv
import torchvision.transforms.functional

from .common import settings_to_repr, Transform
from .encoder import BatchConvOp, BatchIntersectEncode2D, BatchIoUEncode2D, \
    BatchIntersectDecode2D, BatchBoxBloat


[docs]def pad_to_ratio(img_t: torch.Tensor, ratio: float = 1.,
                 pad_value: float = 0,
                 ) -> Tuple[torch.Tensor, Tuple[int, int, int, int]]:
    """Pad image with constant ``pad_value`` to obtain given
    image size ``ratio``.

    :param img_t: 2D pytorch tensor
    :param ratio: the desired ratio ``(width / height)``
    :param pad_value: constant value to use for padding area
    :return: tensor representing padded 2D image (batch)
    """
    if len(img_t.shape) == 3:
        _, height, width = img_t.shape
    elif len(img_t.shape) == 2:
        height, width = img_t.shape
    else:
        raise ValueError("Wrong image shape ({}); expected 2 or 3 dimensions"
                         .format(img_t.shape))
    # Add padding to image
    pad = padding_for_ratio((height, width), ratio)
    img_t = torch.nn.functional.pad(img_t, list(pad), value=pad_value)

    return img_t, pad


[docs]def padding_for_ratio(from_size: Tuple[int, int], to_ratio: float
                      ) -> Tuple[int, int, int, int]:
    """Return the int padding for an image of size ``(height, width)`` to get
    a ``(width / height)`` ratio of ``ratio``.
    Output can be used for :py:func:`torch.nn.functional.pad` pad argument.

    :return: padding as ``(left, right, top, bottom)``"""
    height, width = from_size
    dim_diff_w: int = max(0, int((height * to_ratio) - width))
    dim_diff_h: int = max(0, int((width / to_ratio) - height))
    # (upper / left) padding and (lower / right) padding
    pad_h_l: int = dim_diff_h // 2
    pad_h_r: int = dim_diff_h - pad_h_l
    pad_w_l: int = dim_diff_w // 2
    pad_w_r: int = dim_diff_w - pad_w_l
    # padding put together:
    return pad_w_l, pad_w_r, pad_h_l, pad_h_r


[docs]def resize(tens: torch.Tensor, size: Tuple[int, int], mode: str = "bilinear"
           ) -> torch.Tensor:
    """Resize the given tensor assuming it to be a 2D image or batch thereof.
    This is a wrapper around :py:func:`torch.nn.functional.interpolate` which
    takes care of automatic unsqueezing and squeezing of batch and channel
    dimensions assuming 2D images.

    :param tens: the tensor holding a 2D image or batch thereof; dimensions are
        interpreted as ``([[batch,] channel,] height, width)``
    :param size: the new 2D size
    :param mode: the interpolation mode; one of the options for
        :py:func:`torch.nn.functional.interpolate`
    :return: tensor representing resized 2D image (batch)
    """
    if tens.dim() < 2:
        raise ValueError(("Given tensor only is {}D, but was expected to be "
                          ">= 2D (height, width)!").format(tens.dim()))
    if tens.dim() > 4:
        raise ValueError(("Given tensor is {}D, but was expected to be <= 4D "
                          "(batch, channel, height, width)").format(tens.dim()))
    # Take care of unsqueezing batch and channel dimension:
    unsqueeze_dims: int = max(0, 4 - tens.dim())
    for _ in range(unsqueeze_dims):
        tens = tens.unsqueeze(0)

    # actual resizing:
    align_setting = dict(align_corners=False) \
        if 'linear' in mode or 'cubic' in mode else {}
    interp_x = torch.nn.functional.interpolate(
        tens, size=size, mode=mode, **align_setting)

    # Now squeeze unsqueezed dimensions again:
    for _ in range(unsqueeze_dims):
        interp_x = interp_x.squeeze(0)
    return interp_x


[docs]class ImageTransform(Transform):
    """Transformations that can be applied to images.
    Images should be given as :py:class:`torch.Tensor` version of a
    :py:class:`PIL.Image.Image` instance.
    The transformation will iteratively descent into mappings and sequences
    to find tensor values to apply the transformation to
    (``None`` values are left untouched).
    An error will be raised if values are found which are neither tensors
    nor ``None``.
    """

[docs]    @abc.abstractmethod
    def apply_to(self, img: torch.Tensor) -> torch.Tensor:
        """Application of transformation."""
        raise NotImplementedError()

[docs]    def __call__(self,
                 img: Optional[Union[torch.Tensor,
                                     Dict[Any, torch.Tensor],
                                     Sequence[torch.Tensor]]]
                 ) -> Optional[Union[torch.Tensor,
                                     Dict[Any, torch.Tensor],
                                     Sequence[torch.Tensor]]]:
        """Application of transformation."""
        if img is None:
            return img
        # Recursion instructions:
        if isinstance(img, Sequence) and not isinstance(img, str):
            return tuple([self.__call__(val) for val in img])
        if isinstance(img, Mapping):
            return {key: self.__call__(val) for key, val in img.items()}
        return self.apply_to(img)


[docs]class RecursiveLambda(ImageTransform):
    """Generic lambda transformation that applies the given function
    with the standard :py:class:`ImageTransform` recursion.
    The same caveats hold as for
    :py:class:`~hybrid_learning.datasets.transforms.common.Lambda`.
    """

    @property
    def settings(self) -> Dict[str, Any]:
        """Settings to reproduce the instance."""
        return dict(fun=self.fun)

[docs]    def __repr__(self) -> str:
        return settings_to_repr(self, dict(
            fun=(self.fun.__name__
                 if hasattr(self.fun, "name") else repr(self.fun))))

[docs]    def __init__(self, fun: Callable[[torch.Tensor], torch.Tensor]):
        """Init.

        :param fun: the function to apply on call
        """
        self.fun: Callable[[torch.Tensor], torch.Tensor] = fun
        """The function to apply on call."""

[docs]    def apply_to(self, img: torch.Tensor) -> torch.Tensor:
        """Application of the lambda."""
        return self.fun(img)


class Resize(ImageTransform):
    """Simple resize.
    The padding value is black.
    Internally, :py:func:`resize` is used.

    .. note::
        Depending on the mode, the used interpolation can cause
        overshooting values larger/smaller than the previous
        minimum/maximum value.
        Ensure to catch such behavior if necessary by using
        :py:func:`torch.clamp`.
    """

    @property
    def settings(self):
        """Settings to reproduce the instance."""
        return dict(img_size=self.img_size, interpolation=self.interpolation)

    def __init__(self, img_size: Tuple[int, int],
                 interpolation: str = "bilinear", force_type: bool = False):
        """Init.

        :param img_size: see :py:attr:`img_size`
        :param interpolation: see :py:attr:`interpolation`
        :param force_type: see :py:attr:`force_type`
        """
        self.img_size: Tuple[int, int] = img_size
        """Image target size as ``(height, width)``."""
        self.interpolation: str = interpolation
        """Interpolation mode to use for the resizing.
        See :py:func:`resize`."""
        self.force_type: bool = force_type
        """Whether to raise in case the input is no tensor or
        to silently skip the transformation.
        If set to ``False``, one can silently skip floats and other types."""

    def apply_to(self, img: torch.Tensor) -> torch.Tensor:
        """Resize ``img`` to the configured image size.
        See also :py:attr:`img_size`."""
        if not isinstance(img, torch.Tensor):
            if self.force_type:
                raise TypeError("Can only resize images encoded as torch.Tensor but got img of type {}"
                                .format(type(img)))
            return img
        img = resize(img, size=self.img_size, mode=self.interpolation)
        return img


[docs]class PadAndResize(Resize):
    """Transformation that pads an image to a given ratio and then
    resizes it to fixed size.
    This is especially suitable if going from larger image dimensions to
    smaller ones.
    For the other way round, consider first scaling, then padding.
    For further details see super class.
    """

[docs]    def apply_to(self, img: torch.Tensor) -> torch.Tensor:
        """Pad ``img`` to square, then resize it to the
        configured image size.
        See also :py:attr:`~Resize.img_size`."""
        img = pad_and_resize(img, img_size=self.img_size, interpolation=self.interpolation)
        return img


[docs]def pad_and_resize(img: torch.Tensor, img_size: Tuple[int, int],
                   interpolation: str = "bilinear"):
    """Pad and resize an image.
    For details see :py:class:`PadAndResize`."""
    img = pad_to_ratio(img, img_size[1] / img_size[0])[0]
    img = resize(img, size=img_size, mode=interpolation)
    return img


[docs]class Threshold(ImageTransform):
    """Threshold tensors and set new values below and/or above the threshold.
    The operation is:

    .. code-block: python

        x = val_low_class if x <= post_target_thresh else val_high_class

    Each of the thresholds :py:attr:`val_low_class` and
    :py:attr:`val_high_class` can also be set to ``None``,
    in which case``x`` is used instead.
    Set values to both to obtain a binarizing operation.

    .. note::
        :py:attr:`val_low_class` needs *not* to be lower than
        :py:attr:`val_high_class`, so one can also invert binary masks with
        this.
    """

[docs]    def __init__(self, threshold: Union[float, torch.Tensor] = 0.5,
                 val_low_class: Optional[Union[float, torch.Tensor]] = 0.,
                 val_high_class: Optional[Union[float, torch.Tensor]] = 1.):
        """Init.

        :param threshold: the threshold that defines the border between low
            and high class
        :param val_high_class: the value to which to set entries from high class
        :param val_low_class: the value to which to set entries from low class
        """
        super().__init__()
        self.threshold: Union[float, torch.Tensor] = threshold
        """Threshold by which to decide the class;
        low class if ``x<=post_target_thresh``, else high"""
        self.val_low_class: Optional[Union[float, torch.Tensor]] = \
            val_low_class
        """Value to set the low class to.
        If set to ``None``, the input value is used."""
        self.val_high_class: Optional[Union[float, torch.Tensor]] = \
            val_high_class
        """Value to set the high class to.
        If set to ``None``, the input value is used."""

    @property
    def settings(self) -> Dict[str, Any]:
        """Settings to reproduce instance."""
        settings = dict(threshold=self.threshold)
        if self.val_low_class != 0.:
            settings['val_low_class'] = self.val_low_class
        if self.val_high_class != 1.:
            settings['val_high_class'] = self.val_high_class
        return settings

[docs]    def apply_to(self, input_tensor: torch.Tensor) -> torch.Tensor:
        """Binarize ``input_tensor`` according to the settings.
        In case any of this instances settings are tensors, these are
        moved to the same device as ``input_tensor``."""
        # region Value checks and default values
        if not isinstance(input_tensor, torch.Tensor):
            raise ValueError(("input_tensor must be of type torch.Tensor, but "
                              "was {}").format(type(input_tensor)))

        val_low_class: Union[torch.Tensor, float] = self.val_low_class \
            if self.val_low_class is not None else input_tensor
        val_high_class: Union[torch.Tensor, float] = self.val_high_class \
            if self.val_high_class is not None else input_tensor

        # ensure all tensors are on the same device:
        if isinstance(self.threshold, torch.Tensor):
            self.threshold = self.threshold.to(input_tensor.device)
        if isinstance(val_high_class, torch.Tensor):
            val_high_class = val_high_class.to(input_tensor.device)
        if isinstance(val_low_class, torch.Tensor):
            val_low_class = val_low_class.to(input_tensor.device)
        # endregion

        return torch.where((input_tensor.float() > self.threshold),
                           torch.as_tensor(val_high_class, dtype=torch.float),
                           torch.as_tensor(val_low_class, dtype=torch.float))

[docs]class Binarize(Threshold):
    """Simple class for binarizing tensors into high and low class values.
    This is an alias for :py:class:`Threshold`. See there for details."""


[docs]class BinarizeByQuantile(ImageTransform):
    """Set all but the given highest number of pixels / q-th quantile
    in an image to zero, rest to 1.
    Mind for RGB images: A pixel here means a pixel in one channel."""

[docs]    def __init__(self, quantile: float = None, num_pixels: int = None):
        """Init.

        :param quantile: quantile of pixels to set to 1, rest is set to 0;
            overridden by ``num_pixels``
        :param num_pixels: number of pixels with highest value to set to one,
            rest is set to 0
        """
        if num_pixels is None and quantile is None:
            raise ValueError("either num_pixels or quantile must be given")
        self.num_pixels: float = num_pixels
        """Number of pixels with highest values to set to one."""
        self.quantile: float = quantile
        """Quantile of pixels to set to one, rest is set to 0;
        overridden by :py:attr:`num_pixels`"""

    @property
    def settings(self) -> Dict[str, Any]:
        """Settings to reproduce the instance."""
        if self.num_pixels is not None:
            return dict(num_pixels=self.num_pixels)
        return dict(quantile=self.quantile)

[docs]    def apply_to(self, img: torch.Tensor) -> torch.Tensor:
        """Binarize ``img`` by setting a quantile or number of pixels to one,
        the rest to 0.
        See :py:attr:`quantile` respectively :py:attr:`num_pixels`.

        :param img: target tensor to binarize
        """
        img_np: np.ndarray = img.detach().cpu().numpy()
        quantile: float = min(self.num_pixels / img_np.size, 1) \
            if self.num_pixels is not None else self.quantile
        thresh: float = np.quantile(img_np, 1 - quantile)
        img = (img > thresh).float()
        return img


[docs]class BatchWiseImageTransform(ImageTransform, abc.ABC):
    """Wrap a transformation operating on a batch of masks to also work on
    single masks."""

    @property
    def settings(self) -> Dict[str, Any]:
        """Settings to reproduce the instance."""
        return dict(batch_wise=self.batch_wise) if self.batch_wise else {}

[docs]    def __init__(self, batch_wise: bool = False):
        self.batch_wise: bool = batch_wise
        """Whether to assume a batch of masks is given (``True``) or a
        single mask (``False``)."""

[docs]    def apply_to(self, mask: torch.Tensor) -> torch.Tensor:
        """Apply trafo to the mask (either considered as batch of mask or
        single mask)."""
        if not self.batch_wise:
            mask: torch.Tensor = mask.unsqueeze(0)
        mask = self.apply_to_batch(mask)
        if not self.batch_wise:
            return mask.squeeze(0)
        return mask

[docs]    @abc.abstractmethod
    def apply_to_batch(self, batch: torch.Tensor) -> torch.Tensor:
        """Batch-wise transformation."""
        raise NotImplementedError()


[docs]class AsBatch(BatchWiseImageTransform):
    """Ensure that the given transformation is fed with a batch of inputs.
    :py:attr:`~BatchWiseImageTransform.batch_wise` determines whether
    inputs are assumed to already be batches or not.
    The output is the same as the input (batch or not)."""

[docs]    def __init__(self, trafo: Callable[[torch.Tensor], torch.Tensor],
                 batch_wise: bool = False):
        super().__init__(batch_wise=batch_wise)
        self.trafo: Callable[[torch.Tensor], torch.Tensor] = trafo
        """The transformation that requires batch-wise input."""

[docs]    def apply_to_batch(self, batch: torch.Tensor) -> torch.Tensor:
        """Feed the batch to trafo."""
        return self.trafo(batch)


[docs]class ToFixedDims(ImageTransform):
    """Squeeze or unsqueeze a tensor to obtain specified number of
    dimensions."""

[docs]    def __init__(self, dims: int):
        assert dims > 0
        self.dims: int = dims

[docs]    def apply_to(self, img: torch.Tensor) -> torch.Tensor:
        """Squeezing or unsqueezing."""
        while len(img.size()) > self.dims:
            if img.size()[0] > 1:
                raise ValueError(("Cannot squeeze first dimension in tensor of "
                                  "size {} towards {} dimensions.")
                                 .format(img.size(), self.dims))
            img = img.squeeze(0)
        while len(img.size()) < self.dims:
            img = img.unsqueeze(0)
        return img


[docs]class WithThresh(BatchWiseImageTransform):
    # pylint: disable=line-too-long
    """Wrap a batch transformation with binarizing (and unsqueezing) before
    and after.

    The transformation should accept a tensor holding a masks (respectively a
    batch of masks if
    :py:attr:`~hybrid_learning.datasets.transforms.image_transforms.BatchWiseImageTransform.batch_wise`
    is ``True``) and return a transformed batch.
    If given, ``pre_thresh`` is applied before, and
    ``post_thresh`` after the transformation.
    The transformation is assumed to require a batch of masks, so if
    :py:attr:`~hybrid_learning.datasets.transforms.image_transforms.BatchWiseImageTransform.batch_wise`
    is ``False``, the missing batch dimension is handled.
    Thus, this wrapper can also be used to turn a batch operation
    into one on single masks.
    """

    # pylint: enable=line-too-long

[docs]    def __init__(self,
                 trafo: Callable[[torch.Tensor], torch.Tensor],
                 pre_thresh: Optional[float] = None,
                 post_thresh: Optional[float] = None,
                 batch_wise: bool = False,
                 pre_low_class: float = 0., pre_high_class: float = 1.,
                 post_low_class: float = 0., post_high_class: float = 1.,
                 ):
        # pylint: disable=line-too-long
        """Init.

        :param trafo: the transformation instance to wrap
        :param pre_thresh: if not ``None``, the tensors to be modified are
            binarized to 0 and 1 values with threshold ``pre_thresh`` before
            modification
        :param post_thresh: if not ``None``, the tensors to be modified are
            binarized to 0 and 1 values with threshold ``post_thresh`` after
            modification
        :param batch_wise: see
            :py:attr:`~hybrid_learning.datasets.transforms.image_transforms.BatchWiseImageTransform.batch_wise`
        :param pre_high_class: value to set items to that exceed ``pre_thresh``
        :param pre_low_class: value to set items to that are below
            ``pre_thresh``
        :param post_high_class: value to set items to that exceed
            ``post_thresh``
        :param post_low_class: value to set items to that are below
            ``post_thresh``
        """
        # pylint: enable=line-too-long
        # Value checks:
        if not callable(trafo):
            raise ValueError("trafo is not callable, but of type {}"
                             .format(type(trafo)))

        super().__init__(batch_wise=batch_wise)

        self.trafo: Callable[[torch.Tensor], torch.Tensor] = trafo
        """Modifier (en- or decoder) module that is used for modifications."""
        self.pre_thresholder: Optional[Binarize] = None \
            if pre_thresh is None else \
            Binarize(threshold=pre_thresh,
                     val_low_class=pre_low_class,
                     val_high_class=pre_high_class)
        """Binarizing transformation applied to targets before IoU encoding
        if not ``None``."""
        self.post_thresholder: Optional[Binarize] = None \
            if post_thresh is None else \
            Binarize(threshold=post_thresh,
                     val_low_class=post_low_class,
                     val_high_class=post_high_class)
        """Binarizing transformation applied to targets after IoU encoding if
        not ``None``."""

    @property
    def settings(self) -> Dict[str, Any]:
        """Settings to reproduce instance."""
        settings = super().settings
        settings['trafo'] = self.trafo
        if self.pre_thresholder is not None:
            settings['pre_thresh'] = self.pre_thresholder.threshold
            if self.pre_thresholder.val_low_class != 0.:
                settings['pre_val_low'] = self.pre_thresholder.val_low_class
            if self.pre_thresholder.val_high_class != 1.:
                settings['pre_val_high'] = self.pre_thresholder.val_high_class

        if self.post_thresholder is not None:
            settings['post_thresh'] = self.post_thresholder.threshold
            if self.post_thresholder.val_low_class != 0.:
                settings['post_val_low'] = self.post_thresholder.val_low_class
            if self.post_thresholder.val_high_class != 1.:
                settings['post_val_high'] = self.post_thresholder.val_high_class
        return settings

[docs]    def apply_to_batch(self, masks: torch.Tensor) -> torch.Tensor:
        """Forward method in which to apply the trafo and thresholding.

        Pre-threshold, modify, and post-threshold given mask(s).
        The thresholding is applied, if the corresponding
        :py:attr:`pre_thresholder` / :py:attr:`post_thresholder`
        is not ``None``.
        If :py:attr:`batch_wise` is ``False``, it is assumed a single mask
        was given (no batch dimension).

        :param masks: :py:class:`torch.Tensor` of shape
            ``([batch_size,] 1, width, height)`` holding masks for one batch.
        :return: the modified and thresholded masks
        """
        if self.pre_thresholder is not None:
            masks = self.pre_thresholder(masks)
        modified_masks: torch.Tensor = self.trafo(masks)
        if self.post_thresholder is not None:
            modified_masks = self.post_thresholder(modified_masks)
        return modified_masks


[docs]class ToBBoxes(BatchWiseImageTransform):
    """Treat pixels of given mask as scores of constant-size bounding boxes,
    and return a mask with the non-max-suppressed bounding boxes."""

[docs]    def __init__(self,
                 bbox_size: Tuple[int, int],
                 iou_threshold: float = 0.5,
                 batch_wise: bool = False
                 ):
        """Init.

        :param bbox_size: see :py:attr:`bbox_size`
        :param iou_threshold: see :py:attr:`iou_threshold`
        """
        super().__init__(batch_wise=batch_wise)

        self.iou_threshold: float = iou_threshold
        """The threshold for the intersection over union
        between two bounding boxes above which the lower-scored box is
        pruned.
        See also :py:func:`torchvision.ops.nms`."""

        self.bloater: BatchBoxBloat = BatchBoxBloat(kernel_size=bbox_size)
        """The bloating operation used to create a mask with bounding boxes
        from anchors and scores."""

    @property
    def bbox_size(self) -> Tuple[int, int]:
        """The constant size to be assumed for all bounding boxes
        in pixels. Give as ``(height, width)``."""
        return self.bloater.kernel_size

    @property
    def settings(self) -> Dict[str, Any]:
        """Settings to reproduce the instance."""
        iou_info = dict(iou_threshold=self.iou_threshold) \
            if self.iou_threshold != 0.5 else {}
        return dict(bbox_size=self.bbox_size, **iou_info, **super().settings)

[docs]    def apply_to_batch(self, score_masks: torch.Tensor) -> torch.Tensor:
        """Bloat the ``score_masks`` to a mask of non-max-suppressed bounding
        boxes.
        Each pixel in ``score_masks`` should represent the score of a bounding
        box of fixed size anchored at this pixel.
        The box size is derived from :py:attr:`bbox_size`.
        ``score_masks`` should be a mask of size ``(..., height, width)``.
        For non-max-suppression of the bounding boxes,
        :py:func:`torchvision.ops.nms` is used.

        :return: a mask of the same size as ``mask`` with each anchor
            in ``mask`` bloated to a bounding box filled with the score value;
            for overlapping boxes, the higher scored one is up front
        """
        # Some pylint issues with coordinate naming and torch.tensor:
        # pylint: disable=not-callable
        # pylint: disable=invalid-name
        if len(score_masks.size()) < 3:
            raise ValueError(
                ("Given batch of masks has size {} of dimension {} < 3"
                 ).format(score_masks.size(), len(score_masks.size())))
        if len(score_masks.size()) == 3:
            score_masks = score_masks.unsqueeze(1)  # add channel dimension

        # Box dimensions and offsets:
        bbox_h, bbox_w = self.bbox_size
        # else (round(self.bbox_size[0] * score_masks.size()[-2]),
        #       round(self.bbox_size[1] * score_masks.size()[-1]))
        top, left = bbox_h // 2, bbox_w // 2
        bottom, right = bbox_h - top, bbox_w - left

        # Prepare NMS input: Anchors to boxes
        scores: torch.Tensor = score_masks.view((-1,))
        _boxes: List[List[int]] = []
        _batch_idxs: List[int] = []
        # box in mask b of batch centered at (x, y) has index
        # i = b * (mask_height * mask_width) + y * mask_width + x
        for batch_idx in range(score_masks.size()[0]):
            for y in range(score_masks.size()[-2]):
                for x in range(score_masks.size()[-1]):
                    _boxes.append([x - left, y - top, x + right, y + bottom])
                    _batch_idxs.append(batch_idx)
        boxes_t: torch.Tensor = torch.tensor(_boxes, dtype=score_masks.dtype)
        batch_idxs_t: torch.Tensor = torch.tensor(_batch_idxs, dtype=torch.int)

        # NMS: Collect idxs of boxes (resp. box centers) to keep
        keep: torch.Tensor = tv.ops.batched_nms(
            boxes=boxes_t, scores=scores, idxs=batch_idxs_t,
            iou_threshold=self.iou_threshold)
        # To determine the mask center corresponding to an entry in keep_idxs:
        # keep_idx = batch_idx * (mask_height * mask_width) + y * mask_width + x
        _keep_mask: np.ndarray = np.zeros(score_masks.size(), dtype=np.float)
        for keep_idx in keep:
            batch_idx: int = keep_idx // (score_masks.size()[-1] *
                                          score_masks.size()[-2])
            assert batch_idx == batch_idxs_t[keep_idx]
            # xy_idx = y * mask_width + x
            xy_idx: int = keep_idx % (score_masks.size()[-1] *
                                      score_masks.size()[-2])
            y: int = xy_idx // score_masks.size()[-1]
            x: int = xy_idx % score_masks.size()[-1]
            _keep_mask[batch_idx, ..., y, x] = 1
        keep_mask_t: torch.Tensor = torch.from_numpy(_keep_mask)

        # Set all scores of boxes to abandon to 0:
        nms_score_mask: torch.Tensor = score_masks * keep_mask_t

        # Now bloat each box center to a box filled with its score:
        return self.bloater(nms_score_mask)
        # pylint: enable=not-callable
        # pylint: enable=invalid-name


[docs]class ToTensor(ImageTransform):
    """Turn objects into tensors or move tensors to given device or dtype.
    The operation avoids copying of data if possible.
    For details see :py:func:`torch.as_tensor`.

    .. note::
        The default return type for :py:class:`PIL.Image.Image` instances is
        a tensor of dtype :py:class:`torch.float` with value range
        in ``[0, 1]``.
    """

    DTYPE_SIZES: Dict[torch.dtype, int] = {
        torch.bool: 1,
        torch.uint8: 8, torch.int8: 8,
        torch.int16: 16, torch.float16: 16, torch.bfloat16: 16,
        torch.int32: 32, torch.float32: 32, torch.complex32: 32,
        torch.int64: 64, torch.float64: 64, torch.complex64: 64,
        torch.complex128: 128,
    }

[docs]    def __init__(self, device: Optional[Union[str, torch.device]] = None,
                 dtype: Optional[torch.dtype] = None,
                 sparse: Optional[Union[bool, str]] = None,
                 requires_grad: Optional[bool] = None):
        self.device: torch.device = torch.device(device) if device is not None \
            else ("cpu" if not torch.cuda.is_available() else None)
        """The device to move tensors to."""
        self.dtype: torch.dtype = dtype
        """The dtype created tensors shall have."""
        self.sparse: Optional[bool] = sparse
        """Whether the tensor should be sparse or dense or dynamically choose
        the smaller one (option 'smallest').
        No modification is made if set to ``None``."""
        self.requires_grad: Optional[bool] = requires_grad
        """Whether the new tensor should require grad."""

    @property
    def settings(self) -> Dict[str, Any]:
        """Settings."""
        setts = dict()
        if self.device is not None:
            setts.update(device=self.device)
        if self.dtype is not None:
            setts.update(dtype=self.dtype)
        return setts

[docs]    @staticmethod
    def to_sparse(tens: torch.Tensor,
                  device: Optional[Union[torch.device, str]] = None,
                  dtype: Optional[torch.dtype] = None,
                  requires_grad: Optional[bool] = None
                  ) -> torch.sparse.Tensor:
        """Convert dense tensor ``tens`` to sparse tensor.
        Scalars are not sparsified but returned as normal tensors.
        """
        if len(tens.size()) == 0:
            return torch.as_tensor(tens, device=device, dtype=dtype)

        # scalar case
        indices = torch.nonzero(tens).t()
        values = tens[tuple(indices)]
        sparse_tens: torch.sparse.Tensor = torch.sparse_coo_tensor(
            indices, values, size=tens.size(), device=device, dtype=dtype,
            requires_grad=(requires_grad if requires_grad is not None
                           else tens.requires_grad)
        )
        return sparse_tens

[docs]    @classmethod
    def is_sparse_smaller(cls, tens):
        r"""Given a tensor, return whether its sparse representation occupies
        less storage.
        Given the size formulas

        .. math::
            \text{sparse size:}\quad
            \text{numel} \cdot d \cdot (d\cdot s_{ind} + s_{val}) \\
            \text{dense size:}\quad
            \text{numel} \cdot s_{val}

        for the size in bit of one index resp. value entry
        :math:`s_{val}, s_{ind}`, the dimension of the tensor :math:`d`,
        the formula whether the sparse representation is better is:

        .. math::
            p < \frac {s_{val}} {d \cdot s_{ind} + s_{val}}

        and the proportion of non-zero elements :math:`p`.
        """
        bitsize_index: int = 64
        bitsize_value: int = cls.DTYPE_SIZES[tens.dtype]
        return ((tens.count_nonzero() / tens.numel()) <
                (bitsize_value / (tens.dim() * bitsize_index + bitsize_value)))

[docs]    def apply_to(self, tens: Union[torch.Tensor, np.ndarray, PIL.Image.Image]
                 ) -> Union[torch.Tensor, torch.sparse.Tensor]:
        """Create tensor from ``tens`` with configured device and dtype.
        See :py:attr:`device` and :py:attr:`dtype`."""
        return self.to_tens(tens,
                            device=self.device, dtype=self.dtype,
                            sparse=self.sparse,
                            requires_grad=self.requires_grad)

[docs]    @classmethod
    def to_tens(cls, tens: Union[torch.Tensor, np.ndarray, PIL.Image.Image],
                device: Union[str, torch.device] = None,
                dtype: Optional[torch.dtype] = None,
                sparse: Optional[Union[bool, str]] = None,
                requires_grad: Optional[bool] = None):
        """See ``apply_to`` and ``__init__``."""
        if isinstance(tens, PIL.Image.Image):
            tens: torch.Tensor = \
                torchvision.transforms.functional.to_tensor(tens)

        # to correct device and dtype
        tens: torch.Tensor = \
            torch.as_tensor(tens, device=device, dtype=dtype)

        # possibly sparsify
        if sparse and (not tens.is_sparse) and \
                (sparse != 'smallest' or cls.is_sparse_smaller(tens)):
            tens: torch.sparse.Tensor = cls.to_sparse(
                tens, device=device, dtype=dtype,
                requires_grad=requires_grad)

        # explicitly densify
        if not sparse and sparse is not None and tens.is_sparse:
            # bfloat16 cannot be densified in older torch versions:
            if tens.dtype == torch.bfloat16:
                tens = tens.to(torch.float)
            tens: torch.Tensor = tens.to_dense()

        if requires_grad is not None:
            if not requires_grad and tens.requires_grad and not tens.is_leaf:
                tens = tens.detach()
            else:
                tens = tens.requires_grad_(requires_grad)
        return tens


[docs]class NoGrad(ImageTransform):
    """Disable ``requires_grad`` for the given tensors."""

[docs]    def apply_to(self, tens: torch.Tensor) -> torch.Tensor:
        """Set ``requires_grad`` to ``False`` for ``tens`` in-place."""
        if isinstance(tens, torch.Tensor):
            return tens.detach()
        if isinstance(tens, torch.nn.Module):
            # noinspection PyTypeChecker
            return tens.requires_grad_(False)
        return tens


[docs]class ToActMap(ImageTransform):
    """Evaluate a given image by a torch model on the correct device.
    The model should return tensors, e.g. be a
    :py:class:`~hybrid_learning.concepts.models.model_extension.ModelStump`.
    If :py:attr:`device` is given, the parameters of the model
    :py:attr:`act_map_gen` are moved to this device.

    .. warning::
        Ensure moving of the model parameters to a different device
        does not interfere with e.g. optimization of these parameters
        in case :py:attr:`device` is given!
    """

    @property
    def settings(self) -> Dict[str, Any]:
        """Settings to reproduce the instance."""
        return dict(act_map_gen=self.act_map_gen,
                    device=self.device)

[docs]    def __init__(self, act_map_gen: torch.nn.Module,
                 device: Optional[Union[str, torch.device]] = None,
                 requires_grad: bool = False):
        """Init.

        :param act_map_gen: model the output of which is interpreted as
            activation maps
        :param device: the device to operate the transformation on
        :param requires_grad: whether the model and the transformation output
            should require gradients
            (this trafo may be unpickleable in combination with cuda usage of
            set to ``True``)
        """
        self.requires_grad: bool = requires_grad
        """Whether to turn gradient tracking on for the transformation
        calculation."""
        self.act_map_gen: torch.nn.Module = \
            act_map_gen.eval().requires_grad_(requires_grad)
        """Callable torch model that accepts and returns a
        :py:class:`torch.Tensor`."""
        self.device: Optional[Union[str, torch.device]] = torch.device(device) \
            if isinstance(device, str) else device
        """If given, the device to move model and image to before evaluation."""

[docs]    def apply_to(self, img_t: torch.Tensor) -> torch.Tensor:
        """Collect output of activation map generator for image ``img_t`` as
        input.
        The evaluation of :py:attr:`act_map_gen` on ``img_t`` is conducted
        on :py:attr:`device` if this is set.

        :param img_t: image for which to obtain activation map;
            make sure all necessary transformations are applied
        :return: activation map as :py:class:`torch.Tensor`
        """
        # Run wrapper to obtain intermediate outputs
        with torch.set_grad_enabled(self.requires_grad):
            if self.device is not None:
                self.act_map_gen = self.act_map_gen.to(self.device)
                img_t = img_t.to(self.device)
            # move input to correct device
            elif len(list(self.act_map_gen.parameters())) > 0:
                img_t = img_t.to(next(self.act_map_gen.parameters()).device)

            act_map = self.act_map_gen.eval()(img_t.unsqueeze(0))
            # Squeeze batch dimension
            act_map = act_map.squeeze(0)
        return act_map


[docs]class ConvOpWrapper(WithThresh):
    """Base wrapper class to turn convolutional batch operations into single
    mask operations.
    Wraps classes inheriting from
    :py:class:`~hybrid_learning.datasets.transforms.encoder.BatchConvOp`."""

[docs]    def __init__(self, trafo: BatchConvOp, **kwargs):
        super().__init__(trafo=trafo, **kwargs)
        self.trafo: BatchConvOp = self.trafo

    @property
    def proto_shape(self) -> np.ndarray:
        """Wrap the
        :py:class:`~hybrid_learning.datasets.transforms.encoder.BatchConvOp.proto_shape`."""
        return self.trafo.proto_shape

    @property
    def kernel_size(self) -> Tuple[int, ...]:
        """Wrap the
        :py:class:`~hybrid_learning.datasets.transforms.encoder.BatchConvOp.kernel_size`."""
        return self.trafo.kernel_size

    @property
    def settings(self) -> Dict[str, Any]:
        """Settings; essentially merged from wrapped encoder and super."""
        return dict(**self.trafo.settings, **super().settings)


[docs]class IntersectEncode(ConvOpWrapper):
    """Intersection encode a single mask.
    This is a wrapper around
    :py:class:`~hybrid_learning.datasets.transforms.encoder.BatchIntersectEncode2D`.
    """

[docs]    def __init__(self, kernel_size: Tuple[int, int] = None, *,
                 normalize_by: str = 'proto_shape',
                 proto_shape: np.ndarray = None,
                 **thresh_args):
        # pylint: disable=line-too-long
        """Init.

        :param thresh_args: thresholding arguments;
            see :py:class:`~hybrid_learning.datasets.transforms.image_transforms.WithThresh`
        """
        # pylint: enable=line-too-long
        super().__init__(
            trafo=BatchIntersectEncode2D(kernel_size=kernel_size,
                                         proto_shape=proto_shape,
                                         normalize_by=normalize_by),
            **thresh_args)


[docs]class IoUEncode(ConvOpWrapper):
    """IoU encode a single mask.
    This is a wrapper around
    :py:class:`~hybrid_learning.datasets.transforms.encoder.BatchIoUEncode2D`.
    """

[docs]    def __init__(self,
                 kernel_size: Tuple[int, int], *,
                 proto_shape: np.ndarray = None,
                 smooth: float = None,
                 **thresh_args):
        # pylint: disable=line-too-long
        """Init.

        :param thresh_args: thresholding arguments;
            see :py:class:`~hybrid_learning.datasets.transforms.image_transforms.WithThresh`
        """
        # pylint: enable=line-too-long
        super().__init__(
            trafo=BatchIoUEncode2D(kernel_size=kernel_size,
                                   proto_shape=proto_shape,
                                   **(dict(smooth=smooth) if smooth is not None
                                      else {})),
            **thresh_args)


[docs]class IntersectDecode(ConvOpWrapper):
    """IoU encode a single mask.
    This is a wrapper around
    :py:class:`~hybrid_learning.datasets.transforms.encoder.BatchIntersectDecode2D`.
    """

[docs]    def __init__(self,
                 kernel_size: Tuple[int, int], *,
                 proto_shape: np.ndarray = None,
                 **thresh_args):
        # pylint: disable=line-too-long
        """Init.

        :param thresh_args: thresholding arguments;
            see :py:class:`~hybrid_learning.datasets.transforms.image_transforms.WithThresh`
        """
        # pylint: enable=line-too-long
        super().__init__(
            trafo=BatchIntersectDecode2D(kernel_size=kernel_size,
                                         proto_shape=proto_shape),
            **thresh_args)