Skip to content

vision_utils

RandomScale

Rescale the input PIL.Image to the given size. Args: minsize (sequence or int): Desired min output size. If size is a sequence like (w, h), output size will be matched to this. If size is an int, smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size) maxsize (sequence or int): Desired max output size. If size is a sequence like (w, h), output size will be matched to this. If size is an int, smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size) interpolation (int, optional): Desired interpolation. Default is PIL.Image.BILINEAR

Source code in omnigibson/utils/vision_utils.py
class RandomScale:
    """Rescale the input PIL.Image to the given size.
    Args:
        minsize (sequence or int): Desired min output size. If size is a sequence like
            (w, h), output size will be matched to this. If size is an int,
            smaller edge of the image will be matched to this number.
            i.e, if height > width, then image will be rescaled to
            (size * height / width, size)
        maxsize (sequence or int): Desired max output size. If size is a sequence like
            (w, h), output size will be matched to this. If size is an int,
            smaller edge of the image will be matched to this number.
            i.e, if height > width, then image will be rescaled to
            (size * height / width, size)
        interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``
    """

    def __init__(self, minsize, maxsize, interpolation=Image.BILINEAR):
        assert isinstance(minsize, int)
        assert isinstance(maxsize, int)
        self.minsize = minsize
        self.maxsize = maxsize
        self.interpolation = interpolation

    def __call__(self, img):
        """
        Args:
            img (PIL.Image): Image to be scaled.

        Returns:
            PIL.Image: Rescaled image.
        """

        size = np.random.randint(self.minsize, self.maxsize + 1)

        if isinstance(size, int):
            w, h = img.size
            if (w <= h and w == size) or (h <= w and h == size):
                return img
            if w < h:
                ow = size
                oh = int(size * h / w)
                return img.resize((ow, oh), self.interpolation)
            else:
                oh = size
                ow = int(size * w / h)
                return img.resize((ow, oh), self.interpolation)
        else:
            raise NotImplementedError()

__call__(img)

Parameters:

Name Type Description Default
img Image

Image to be scaled.

required

Returns:

Type Description

PIL.Image: Rescaled image.

Source code in omnigibson/utils/vision_utils.py
def __call__(self, img):
    """
    Args:
        img (PIL.Image): Image to be scaled.

    Returns:
        PIL.Image: Rescaled image.
    """

    size = np.random.randint(self.minsize, self.maxsize + 1)

    if isinstance(size, int):
        w, h = img.size
        if (w <= h and w == size) or (h <= w and h == size):
            return img
        if w < h:
            ow = size
            oh = int(size * h / w)
            return img.resize((ow, oh), self.interpolation)
        else:
            oh = size
            ow = int(size * w / h)
            return img.resize((ow, oh), self.interpolation)
    else:
        raise NotImplementedError()

Remapper

Remaps values in an image from old_mapping to new_mapping using an efficient key_array. See more details in the remap method.

Source code in omnigibson/utils/vision_utils.py
class Remapper:
    """
    Remaps values in an image from old_mapping to new_mapping using an efficient key_array.
    See more details in the remap method.
    """
    def __init__(self):
        self.key_array = np.array([], dtype=np.uint32)  # Initialize the key_array as empty
        self.known_ids = set()

    def clear(self):
        """Resets the key_array to empty."""
        self.key_array = np.array([], dtype=np.uint32)
        self.known_ids = set()

    def remap(self, old_mapping, new_mapping, image):
        """
        Remaps values in the given image from old_mapping to new_mapping using an efficient key_array.
        If the image contains values that are not in old_mapping, they are remapped to the value in new_mapping
        that corresponds to 'unlabelled'.

        Args:
            old_mapping (dict): The old mapping dictionary that maps a set of image values to labels
                e.g. {1: 'desk', 2: 'chair'}.
            new_mapping (dict): The new mapping dictionary that maps another set of image values to labels,
                e.g. {5: 'desk', 7: 'chair', 100: 'unlabelled'}.
            image (np.ndarray): The 2D image to remap, e.g. [[1, 3], [1, 2]].

        Returns:
            np.ndarray: The remapped image, e.g. [[5,100],[5,7]].
            dict: The remapped labels dictionary, e.g. {5: 'desk', 7: 'chair', 100: 'unlabelled'}.
        """
        # Make sure that max uint32 doesn't match any value in the new mapping
        assert np.all(np.array(list(new_mapping.keys())) != np.iinfo(np.uint32).max), "New mapping contains default unmapped value!"
        image_max_key = np.max(image)
        key_array_max_key =  len(self.key_array) - 1
        if image_max_key > key_array_max_key:
            prev_key_array = self.key_array.copy()
            # We build a new key array and use max uint32 as the default value.
            self.key_array = np.full(image_max_key + 1, np.iinfo(np.uint32).max, dtype=np.uint32)
            # Copy the previous key array into the new key array
            self.key_array[:len(prev_key_array)] = prev_key_array

        new_keys = old_mapping.keys() - self.known_ids
        if new_keys:
            self.known_ids.update(new_keys)
            # Populate key_array with new keys
            for key in new_keys:
                label = old_mapping[key]
                new_key = next((k for k, v in new_mapping.items() if v == label), None)
                assert new_key is not None, f"Could not find a new key for label {label} in new_mapping!"
                self.key_array[key] = new_key

        # For all the values that exist in the image but not in old_mapping.keys(), we map them to whichever key in
        # new_mapping that equals to 'unlabelled'. This is needed because some values in the image don't necessarily
        # show up in the old_mapping, i.e. particle systems.
        for key in np.unique(image):
            if key not in old_mapping.keys():
                new_key = next((k for k, v in new_mapping.items() if v == 'unlabelled'), None)
                assert new_key is not None, f"Could not find a new key for label 'unlabelled' in new_mapping!"
                self.key_array[key] = new_key

        # Apply remapping
        remapped_img = self.key_array[image]
        # Make sure all values are correctly remapped and not equal to the default value
        assert np.all(remapped_img != np.iinfo(np.uint32).max), "Not all keys in the image are in the key array!"
        remapped_labels = {}
        for key in np.unique(remapped_img):
            remapped_labels[key] = new_mapping[key]

        return remapped_img, remapped_labels

    def remap_bbox(self, semantic_id):
        """
        Remaps a semantic id to a new id using the key_array.
        Args:
            semantic_id (int): The semantic id to remap.
        Returns:
            int: The remapped id.
        """
        assert semantic_id < len(self.key_array), f"Semantic id {semantic_id} is out of range!"
        return self.key_array[semantic_id]

clear()

Resets the key_array to empty.

Source code in omnigibson/utils/vision_utils.py
def clear(self):
    """Resets the key_array to empty."""
    self.key_array = np.array([], dtype=np.uint32)
    self.known_ids = set()

remap(old_mapping, new_mapping, image)

Remaps values in the given image from old_mapping to new_mapping using an efficient key_array. If the image contains values that are not in old_mapping, they are remapped to the value in new_mapping that corresponds to 'unlabelled'.

Parameters:

Name Type Description Default
old_mapping dict

The old mapping dictionary that maps a set of image values to labels e.g. {1: 'desk', 2: 'chair'}.

required
new_mapping dict

The new mapping dictionary that maps another set of image values to labels, e.g. {5: 'desk', 7: 'chair', 100: 'unlabelled'}.

required
image ndarray

The 2D image to remap, e.g. [[1, 3], [1, 2]].

required

Returns:

Name Type Description

np.ndarray: The remapped image, e.g. [[5,100],[5,7]].

dict

The remapped labels dictionary, e.g. {5: 'desk', 7: 'chair', 100: 'unlabelled'}.

Source code in omnigibson/utils/vision_utils.py
def remap(self, old_mapping, new_mapping, image):
    """
    Remaps values in the given image from old_mapping to new_mapping using an efficient key_array.
    If the image contains values that are not in old_mapping, they are remapped to the value in new_mapping
    that corresponds to 'unlabelled'.

    Args:
        old_mapping (dict): The old mapping dictionary that maps a set of image values to labels
            e.g. {1: 'desk', 2: 'chair'}.
        new_mapping (dict): The new mapping dictionary that maps another set of image values to labels,
            e.g. {5: 'desk', 7: 'chair', 100: 'unlabelled'}.
        image (np.ndarray): The 2D image to remap, e.g. [[1, 3], [1, 2]].

    Returns:
        np.ndarray: The remapped image, e.g. [[5,100],[5,7]].
        dict: The remapped labels dictionary, e.g. {5: 'desk', 7: 'chair', 100: 'unlabelled'}.
    """
    # Make sure that max uint32 doesn't match any value in the new mapping
    assert np.all(np.array(list(new_mapping.keys())) != np.iinfo(np.uint32).max), "New mapping contains default unmapped value!"
    image_max_key = np.max(image)
    key_array_max_key =  len(self.key_array) - 1
    if image_max_key > key_array_max_key:
        prev_key_array = self.key_array.copy()
        # We build a new key array and use max uint32 as the default value.
        self.key_array = np.full(image_max_key + 1, np.iinfo(np.uint32).max, dtype=np.uint32)
        # Copy the previous key array into the new key array
        self.key_array[:len(prev_key_array)] = prev_key_array

    new_keys = old_mapping.keys() - self.known_ids
    if new_keys:
        self.known_ids.update(new_keys)
        # Populate key_array with new keys
        for key in new_keys:
            label = old_mapping[key]
            new_key = next((k for k, v in new_mapping.items() if v == label), None)
            assert new_key is not None, f"Could not find a new key for label {label} in new_mapping!"
            self.key_array[key] = new_key

    # For all the values that exist in the image but not in old_mapping.keys(), we map them to whichever key in
    # new_mapping that equals to 'unlabelled'. This is needed because some values in the image don't necessarily
    # show up in the old_mapping, i.e. particle systems.
    for key in np.unique(image):
        if key not in old_mapping.keys():
            new_key = next((k for k, v in new_mapping.items() if v == 'unlabelled'), None)
            assert new_key is not None, f"Could not find a new key for label 'unlabelled' in new_mapping!"
            self.key_array[key] = new_key

    # Apply remapping
    remapped_img = self.key_array[image]
    # Make sure all values are correctly remapped and not equal to the default value
    assert np.all(remapped_img != np.iinfo(np.uint32).max), "Not all keys in the image are in the key array!"
    remapped_labels = {}
    for key in np.unique(remapped_img):
        remapped_labels[key] = new_mapping[key]

    return remapped_img, remapped_labels

remap_bbox(semantic_id)

Remaps a semantic id to a new id using the key_array. Args: semantic_id (int): The semantic id to remap. Returns: int: The remapped id.

Source code in omnigibson/utils/vision_utils.py
def remap_bbox(self, semantic_id):
    """
    Remaps a semantic id to a new id using the key_array.
    Args:
        semantic_id (int): The semantic id to remap.
    Returns:
        int: The remapped id.
    """
    assert semantic_id < len(self.key_array), f"Semantic id {semantic_id} is out of range!"
    return self.key_array[semantic_id]

colorize_bboxes_3d(bbox_3d_data, rgb_image, camera_params)

Project 3D bounding box data onto 2D and colorize the bounding boxes for visualization. Reference: https://forums.developer.nvidia.com/t/mathematical-definition-of-3d-bounding-boxes-annotator-nvidia-omniverse-isaac-sim/223416

Parameters:

Name Type Description Default
bbox_3d_data ndarray

3D bounding box data

required
rgb_image ndarray

RGB image

required
camera_params dict

Camera parameters

required

Returns:

Type Description

np.ndarray: RGB image with 3D bounding boxes drawn

Source code in omnigibson/utils/vision_utils.py
def colorize_bboxes_3d(bbox_3d_data, rgb_image, camera_params):
    """
    Project 3D bounding box data onto 2D and colorize the bounding boxes for visualization.
    Reference: https://forums.developer.nvidia.com/t/mathematical-definition-of-3d-bounding-boxes-annotator-nvidia-omniverse-isaac-sim/223416

    Args:
        bbox_3d_data (np.ndarray): 3D bounding box data
        rgb_image (np.ndarray): RGB image
        camera_params (dict): Camera parameters

    Returns:
        np.ndarray: RGB image with 3D bounding boxes drawn
    """

    def world_to_image_pinhole(world_points, camera_params):
        # Project corners to image space (assumes pinhole camera model)
        proj_mat = camera_params["cameraProjection"].reshape(4, 4)
        view_mat = camera_params["cameraViewTransform"].reshape(4, 4)
        view_proj_mat = np.dot(view_mat, proj_mat)
        world_points_homo = np.pad(world_points, ((0, 0), (0, 1)), constant_values=1.0)
        tf_points = np.dot(world_points_homo, view_proj_mat)
        tf_points = tf_points / (tf_points[..., -1:])
        return 0.5 * (tf_points[..., :2] + 1)

    def draw_lines_and_points_for_boxes(img, all_image_points):
        width, height = img.size
        draw = ImageDraw.Draw(img)

        # Define connections between the corners of the bounding box
        connections = [
            (0, 1), (1, 3), (3, 2), (2, 0),  # Front face
            (4, 5), (5, 7), (7, 6), (6, 4),  # Back face
            (0, 4), (1, 5), (2, 6), (3, 7)   # Side edges connecting front and back faces
        ]

        # Calculate the number of bounding boxes
        num_boxes = len(all_image_points) // 8

        # Generate random colors for each bounding box
        from omni.replicator.core import random_colours
        box_colors = random_colours(num_boxes, enable_random=True, num_channels=3)

        # Ensure colors are in the correct format for drawing (255 scale)
        box_colors = [(int(r), int(g), int(b)) for r, g, b in box_colors]

        # Iterate over each set of 8 points (each bounding box)
        for i in range(0, len(all_image_points), 8):
            image_points = all_image_points[i:i+8]
            image_points[:, 1] = height - image_points[:, 1]  # Flip Y-axis to match image coordinates

            # Use a distinct color for each bounding box
            line_color = box_colors[i // 8]

            # Draw lines for each connection
            for start, end in connections:
                draw.line((image_points[start][0], image_points[start][1],
                        image_points[end][0], image_points[end][1]),
                        fill=line_color, width=2)

    rgb = Image.fromarray(rgb_image)

    # Get 3D corners
    from omni.syntheticdata.scripts.helpers import get_bbox_3d_corners
    corners_3d = get_bbox_3d_corners(bbox_3d_data)
    corners_3d = corners_3d.reshape(-1, 3)

    # Project to image space
    corners_2d = world_to_image_pinhole(corners_3d, camera_params)
    width, height = rgb.size
    corners_2d *= np.array([[width, height]])

    # Now, draw all bounding boxes
    draw_lines_and_points_for_boxes(rgb, corners_2d)

    return np.array(rgb)

randomize_colors(N, bright=True)

Modified from https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/visualize.py#L59 Generate random colors. To get visually distinct colors, generate them in HSV space then convert to RGB.

Parameters:

Name Type Description Default
N int

Number of colors to generate

required

Returns:

Name Type Description
bright bool

whether to increase the brightness of the colors or not

Source code in omnigibson/utils/vision_utils.py
def randomize_colors(N, bright=True):
    """
    Modified from https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/visualize.py#L59
    Generate random colors.
    To get visually distinct colors, generate them in HSV space then
    convert to RGB.

    Args:
        N (int): Number of colors to generate

    Returns:
        bright (bool): whether to increase the brightness of the colors or not
    """
    brightness = 1.0 if bright else 0.5
    hsv = [(1.0 * i / N, 1, brightness) for i in range(N)]
    colors = np.array(list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)))
    rstate = np.random.RandomState(seed=20)
    np.random.shuffle(colors)
    colors[0] = [0, 0, 0]  # First color is black
    return colors

segmentation_to_rgb(seg_im, N, colors=None)

Helper function to visualize segmentations as RGB frames. NOTE: assumes that geom IDs go up to N at most - if not, multiple geoms might be assigned to the same color.

Parameters:

Name Type Description Default
seg_im W, H)-array

Segmentation image

required
N int

Maximum segmentation ID from @seg_im

required
colors None or list of 3-array

If specified, colors to apply to different segmentation IDs. Otherwise, will be generated randomly

None
Source code in omnigibson/utils/vision_utils.py
def segmentation_to_rgb(seg_im, N, colors=None):
    """
    Helper function to visualize segmentations as RGB frames.
    NOTE: assumes that geom IDs go up to N at most - if not,
    multiple geoms might be assigned to the same color.

    Args:
        seg_im ((W, H)-array): Segmentation image
        N (int): Maximum segmentation ID from @seg_im
        colors (None or list of 3-array): If specified, colors to apply
            to different segmentation IDs. Otherwise, will be generated randomly
    """
    # ensure all values lie within [0, N]
    seg_im = np.mod(seg_im, N)

    if colors is None:
        use_colors = randomize_colors(N=N, bright=True)
    else:
        use_colors = colors

    if N <= 256:
        return (255.0 * use_colors[seg_im]).astype(np.uint8)
    else:
        return (use_colors[seg_im]).astype(np.float)