Source code for utils.plotter

"""Tool for displaying images, predictions and boxes"""

import cv2
import numpy as np
import torch
from typing import List, Optional, Union


[docs] class Plotter: """Tool for displaying images, predictions and boxes `OpenCV <https://github.com/opencv/opencv>`_ is used for displaying. """ def __init__( self, threshold=0.8, labels: Optional[List[str]] = None, interval: int = 200, columns: int = 4, ): """ :param threshold: Threshold value for displaying box. Defaults to 0.8. :type threshold: float, optional :param labels: List of class names. Defaults to None. :type labels: Optional[List[str]], optional :param interval: Time interval between frames in milliseconds. Defaults to 200. :type interval: int, optional :param columns: Images are displayed in a grid. This parameter determines its width. Defaults to 4. :type columns: int, optional """ self.threshold = int(threshold * 100) self.labels = labels self.colors = [ (0, 0, 255), (0, 255, 0), (255, 0, 0), (255, 255, 0), (255, 0, 255), (0, 255, 255), (0, 128, 128), ] self.interval = interval self.columns = columns
[docs] def display( self, images: torch.Tensor, predictions: Optional[torch.Tensor], target: Optional[Union[List[torch.Tensor], torch.Tensor]], ): """Plays video from tensor :param images: Shape [ts, batch, channel, h, w]. Expects 2 channels. :type images: torch.Tensor :param predictions: Shape [ts, batch, anchor, 6]. One label contains (class, iou, xlu, ylu, xrd, yrd). :type predictions: Optional[torch.Tensor] :param target: Ground Truth. The length of the list is equal to the number of batch. Expects format: Tensor shape [count_box, 6] One label contains (ts, class id, xlu, ylu, xrd, yrd) or Tensor shape [count_box, 5] One label contains (class id, xlu, ylu, xrd, yrd) :type target: Optional[Union[List[torch.Tensor], torch.Tensor]] """ ts, b, _, h, w = images.shape plt_images = images.permute(0, 1, 3, 4, 2) grey_imgs = 127 * np.ones((ts, b, h, w), dtype=np.uint8) grey_imgs[plt_images[..., 0] > 0] = 0 grey_imgs[plt_images[..., 1] > 0] = 255 con_video = self._concatenate_video(grey_imgs).repeat(3, axis=-1) prep_target, prep_preds = None, None if target is not None: if isinstance(target, torch.Tensor): target = self._transform_targets(target, images.shape[0] - 1) prep_target = self._prepare_targets(target, h, w) if predictions is not None: prep_preds = self._prepare_preds(predictions, h, w) boxed_video = self._apply_boxes(con_video, prep_preds, prep_target) while self._show_video(boxed_video): cmd = cv2.waitKey() if cmd == ord("s"): self._save_video(boxed_video) if cmd == ord("q") or cmd == ord("s"): cv2.destroyWindow("Res") break
def _transform_targets( self, target: torch.Tensor, time_step: int ) -> List[torch.Tensor]: """Transform targets from [batch_size, num_box, 5] [class id, xlu, ylu, xrd, yrd] to List[torch.Tensor[num_box, 6]] [ts, class id, xlu, ylu, xrd, yrd] """ new_target = [] for batch_idx in range(target.shape[0]): batch = target[batch_idx] batch = batch[batch[:, 0] >= 0] time_tens = torch.ones((batch.shape[0], 1)) * time_step new_target.append(torch.concatenate((time_tens, batch), dim=1)) return new_target def _concatenate_video(self, video: np.ndarray) -> np.ndarray: """Combines a batch of videos into one :param video: Shape [ts, b, h, w] :type video: np.ndarray :return: Combines video. Shape [ts, h, w] :rtype: np.ndarray """ b = video.shape[1] video = np.pad( video, pad_width=( (0, 0), (0, self.columns - b % self.columns), (0, 0), (0, 0), ), constant_values=0, ) con_imgs = [] for time_stamp in video: arr = [ np.concatenate(time_stamp[idx : idx + self.columns], axis=1) for idx in range(0, b, self.columns) ] con_imgs.append(np.concatenate(arr, axis=0)) return np.stack(con_imgs)[..., None] def _prepare_preds( self, predictions: torch.Tensor, height: int, width: int ) -> torch.Tensor: """Changes the coordinates of the boxes according to the position of the batch :param predictions: Shape [ts, batch, anchors, 6] One label contains (class, iou, xlu, ylu, xrd, yrd) :type predictions: torch.Tensor :param height: height img :type height: int :param width: width img :type width: int :return: Shape [ts, count_boxes, 6] One label contains (class, iou, xlu, ylu, xrd, yrd) :rtype: torch.Tensor """ for batch_idx in range(predictions.shape[1]): predictions[:, batch_idx, :, [2, 4]] = ( torch.clamp(predictions[:, batch_idx, :, [2, 4]], min=0.0, max=1.0) * width + (batch_idx % self.columns) * width ) predictions[:, batch_idx, :, [3, 5]] = ( torch.clamp(predictions[:, batch_idx, :, [3, 5]], min=0.0, max=1.0) * height + (batch_idx // self.columns) * height ) predictions[..., 1] *= 100 return torch.flatten(predictions, start_dim=1, end_dim=2).type(torch.int32) def _prepare_targets( self, target: List[torch.Tensor], height: int, width: int ) -> torch.Tensor: """Changes the coordinates of the boxes according to the position of the batch :param target: The length of the list is equal to the number of packs Tensor shape [count_box, 6] One label contains (ts, class id, xlu, ylu, xrd, yrd) :type target: List[torch.Tensor] :param height: height img :type height: int :param width: width img :type width: int :return: Shape [count_boxes, 6] One label contains (ts, class id, xlu, ylu, xrd, yrd) :rtype: torch.Tensor """ for batch_idx, t_batch in enumerate(target): t_batch[:, [2, 4]] = ( torch.clamp(t_batch[:, [2, 4]], min=0.0, max=1.0) * width + (batch_idx % self.columns) * width ) t_batch[:, [3, 5]] = ( torch.clamp(t_batch[:, [3, 5]], min=0.0, max=1.0) * height + (batch_idx // self.columns) * height ) return torch.concatenate(target, dim=0).type(torch.int32) def _show_video(self, video: np.ndarray) -> bool: """Playing video :param video: Shape [ts, h, w, channels] :type video: np.ndarray :return: Returns ``False`` if "q" is pressed, otherwise ``True`` :rtype: bool """ for img in video: cv2.imshow("Res", img) if cv2.waitKey(self.interval) == ord("q"): cv2.destroyWindow("Res") return False return True def _save_video(self, video: np.ndarray) -> None: _, h, w, _ = video.shape out = cv2.VideoWriter( "log/out.avi", cv2.VideoWriter_fourcc(*"XVID"), 25, (w, h) ) for img in video: out.write(img) for _ in range(60): out.write(img) return True def _apply_boxes( self, video: np.ndarray, preds: Optional[torch.Tensor], target: Optional[torch.Tensor], ) -> np.ndarray: """Adds boxes to frames :param video: Shape [ts, h, w, channel] :type video: np.ndarray :param preds: Shape [ts, count_box, 6] One label contains (class, iou, xlu, ylu, xrd, yrd) :type preds: Optional[torch.Tensor] :param target: Shape [count_box, 6] One label contains (ts, class id, xlu, ylu, xrd, yrd) :type target: Optional[torch.Tensor] :return: Video with boxes :rtype: np.ndarray """ if (target is None) and (preds is None): return video boxed_video = np.empty_like(video, dtype=video.dtype) for ts, img in enumerate(video): if target is not None: self._draw_target_boxes(img, target[target[:, 0] == ts]) if preds is not None: self._draw_preds_box(img, preds[ts]) boxed_video[ts] = img return boxed_video def _draw_preds_box(self, image: np.ndarray, preds: torch.Tensor) -> None: """Draw bounding boxes for preds :param image: Shape [h, w, channel] :type image: np.ndarray :param preds: Shape [count_box, 6] One label contains (class, iou, xlu, ylu, xrd, yrd) :type preds: torch.Tensor """ mask = (preds[:, 0] >= 0) & (preds[:, 1] >= self.threshold) for box in preds[mask]: start_point = (box[2].item(), box[3].item()) end_point = (box[4].item(), box[5].item()) cv2.rectangle( image, start_point, end_point, color=self.colors[box[0] % len(self.colors)], thickness=1, lineType=cv2.LINE_AA, ) cv2.putText( image, text="%.2f %s" % (box[1].item() / 100, self.labels[box[0]]), org=(box[2].item(), box[3].item() - 4), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.4, thickness=1, color=(255, 0, 0), lineType=cv2.LINE_AA, ) def _draw_target_boxes(self, image: np.ndarray, target: torch.Tensor) -> None: """Draw bounding boxes for targets :param image: Image for drawing :type image: np.ndarray :param target: Shape [count_box, 7] One label contains (ts, class id, xlu, ylu, xrd, yrd) :type target: torch.Tensor """ for box in target: start_point = (box[2].item(), box[3].item()) end_point = (box[4].item(), box[5].item()) cv2.rectangle( image, start_point, end_point, color=[c / 2 for c in self.colors[box[1] % len(self.colors)]], thickness=2, lineType=cv2.LINE_AA, ) cv2.putText( image, text=self.labels[box[1]], org=(box[2].item(), box[3].item() - 4), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.4, thickness=1, color=(0, 60, 0), lineType=cv2.LINE_AA, )