vllm.multimodal.image ¶

convert_image_mode ¶

convert_image_mode(image: Image, to_mode: str)

Source code in vllm/multimodal/image.py

def convert_image_mode(image: Image.Image, to_mode: str):
    if image.mode == to_mode:
        return image
    elif image.mode == "RGBA" and to_mode == "RGB":
        return rgba_to_rgb(image)
    else:
        return image.convert(to_mode)

rescale_image_size ¶

rescale_image_size(
    image: Image, size_factor: float, transpose: int = -1
) -> Image

Rescale the dimensions of an image by a constant factor.

Source code in vllm/multimodal/image.py

def rescale_image_size(
    image: Image.Image, size_factor: float, transpose: int = -1
) -> Image.Image:
    """Rescale the dimensions of an image by a constant factor."""
    new_width = int(image.width * size_factor)
    new_height = int(image.height * size_factor)
    image = image.resize((new_width, new_height))
    if transpose >= 0:
        image = image.transpose(Image.Transpose(transpose))
    return image

rgba_to_rgb ¶

rgba_to_rgb(
    image: Image,
    background_color: tuple[int, int, int] | list[int] = (
        255,
        255,
        255,
    ),
) -> Image

Convert an RGBA image to RGB with filled background color.

Source code in vllm/multimodal/image.py

def rgba_to_rgb(
    image: Image.Image,
    background_color: tuple[int, int, int] | list[int] = (255, 255, 255),
) -> Image.Image:
    """Convert an RGBA image to RGB with filled background color."""
    assert image.mode == "RGBA"
    converted = Image.new("RGB", image.size, background_color)
    converted.paste(image, mask=image.split()[3])  # 3 is the alpha channel
    return converted