Training

`augmentation`

`AddGlassesAugmentation`

Draws glasses on a PIL image, based on face landmarks.

Reference: https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

Uses face detector and shape predictor (model for extracting face landmarks) from dlib. The glasses are composed of 3 line segments and 2 ellipses, inside the ellipses a Gaussian blur is applied.

Example usage:

from src.training.augmentation import AddGlassesAugmentation from PIL import Image img = Image.open("image.jpg") add_glasses = AddGlassesAugmentation.with_default_models() img_with_glasses = add_glasses(img)

Source code in src/training/augmentation/glasses.py

class AddGlassesAugmentation:
    """Draws glasses on a PIL image, based on face landmarks.

    Reference: https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

    Uses face detector and shape predictor (model for extracting face landmarks) from dlib.
    The glasses are composed of 3 line segments and 2 ellipses, inside the ellipses a Gaussian blur is applied.

    Example usage:
    >>> from src.training.augmentation import AddGlassesAugmentation
    >>> from PIL import Image
    >>> img = Image.open("image.jpg")
    >>> add_glasses = AddGlassesAugmentation.with_default_models()
    >>> img_with_glasses = add_glasses(img)
    """

    def __init__(
        self,
        frontal_face_detector,
        shape_predictor,
        width_scale=2.0,
        height_scale=4.0,
        blur_radius=1,
        line_width=3,
        fill_color="black",
    ):
        self.frontal_face_detector = frontal_face_detector
        self.shape_predictor = shape_predictor
        self.width_scale = width_scale
        self.height_scale = height_scale
        self.blur_radius = blur_radius
        self.line_width = line_width
        self.fill_color = fill_color

    @classmethod
    def with_default_models(
        cls,
        width_scale=2.0,
        height_scale=4.0,
        blur_radius=1,
        line_width=3,
        fill_color="black",
    ) -> Self:
        """Load default models and create an instance of AddGlassesAugmentation."""
        frontal_face_detector = get_dlib_frontal_face_detector()
        shape_predictor = get_dlib_shape_predictor()

        return cls(
            frontal_face_detector=frontal_face_detector,
            shape_predictor=shape_predictor,
            width_scale=width_scale,
            height_scale=height_scale,
            blur_radius=blur_radius,
            line_width=line_width,
            fill_color=fill_color,
        )

    def __call__(self, img: Image.Image) -> Image.Image:
        img_numpy = np.array(img)

        try:
            # This seems to fail if face is already centered
            face = self.frontal_face_detector(img_numpy)[0]
        except IndexError:
            print(
                f"{self.__class__.__name__}: No face detected, using entire image to detect landmarks."
            )
            # Proceed as if the face is centered
            face = dlib.rectangle(0, 0, img_numpy.shape[1], img_numpy.shape[0])  # type: ignore

        landmarks = self.shape_predictor(img_numpy, face)

        left_eye_bbox = self._left_glass_bounding_box(landmarks)
        right_eye_bbox = self._right_glass_bounding_box(landmarks)

        img = self._apply_blur_to_ellipse(img, left_eye_bbox)
        img = self._apply_blur_to_ellipse(img, right_eye_bbox)

        draw = ImageDraw.Draw(img)
        draw.ellipse(left_eye_bbox, outline=self.fill_color, width=self.line_width)
        draw.ellipse(right_eye_bbox, outline=self.fill_color, width=self.line_width)
        draw.line(
            self._middle_part(landmarks), fill=self.fill_color, width=self.line_width
        )
        draw.line(
            self._left_part(landmarks), fill=self.fill_color, width=self.line_width
        )
        draw.line(
            self._right_part(landmarks), fill=self.fill_color, width=self.line_width
        )

        return img

    def _left_glass_bounding_box(self, landmarks):
        left_eye_width = (
            abs(landmarks.part(39).x - landmarks.part(36).x) * self.width_scale
        )
        left_eye_height = (
            abs(landmarks.part(37).y - landmarks.part(41).y) * self.height_scale
        )
        left_eye_center_x = (landmarks.part(36).x + landmarks.part(39).x) / 2
        left_eye_center_y = (landmarks.part(36).y + landmarks.part(39).y) / 2

        top_left = (
            left_eye_center_x - left_eye_width / 2,
            left_eye_center_y - left_eye_height / 2,
        )
        bottom_right = (
            left_eye_center_x + left_eye_width / 2,
            left_eye_center_y + left_eye_height / 2,
        )
        return top_left, bottom_right

    def _right_glass_bounding_box(self, landmarks):
        right_eye_width = (
            abs(landmarks.part(45).x - landmarks.part(42).x) * self.width_scale
        )
        right_eye_height = (
            abs(landmarks.part(43).y - landmarks.part(47).y) * self.height_scale
        )
        right_eye_center_x = (landmarks.part(42).x + landmarks.part(45).x) / 2
        right_eye_center_y = (landmarks.part(42).y + landmarks.part(45).y) / 2

        top_left = (
            right_eye_center_x - right_eye_width / 2,
            right_eye_center_y - right_eye_height / 2,
        )
        bottom_right = (
            right_eye_center_x + right_eye_width / 2,
            right_eye_center_y + right_eye_height / 2,
        )
        return top_left, bottom_right

    def _middle_part(self, landmarks):
        left_bbox = self._left_glass_bounding_box(landmarks)
        right_bbox = self._right_glass_bounding_box(landmarks)
        left = (
            left_bbox[1][0],
            (left_bbox[0][1] + left_bbox[1][1]) / 2,
        )
        right = (
            right_bbox[0][0],
            (right_bbox[0][1] + right_bbox[1][1]) / 2,
        )
        return left, right

    def _left_part(self, landmarks):
        left_bbox = self._left_glass_bounding_box(landmarks)
        left = (landmarks.part(0).x, landmarks.part(0).y)
        right = (left_bbox[0][0], (left_bbox[0][1] + left_bbox[1][1]) / 2)
        return left, right

    def _right_part(self, landmarks):
        right_bbox = self._right_glass_bounding_box(landmarks)
        left = (right_bbox[1][0], (right_bbox[0][1] + right_bbox[1][1]) / 2)
        right = (landmarks.part(16).x, landmarks.part(16).y)
        return left, right

    def _apply_blur_to_ellipse(self, img, ellipse_bbox):
        """Apply Gaussian blur inside an ellipse region"""
        mask = Image.new("L", img.size, 0)  # Black mask
        mask_draw = ImageDraw.Draw(mask)
        mask_draw.ellipse(ellipse_bbox, fill=255)  # White ellipse

        blurred_img = img.filter(ImageFilter.GaussianBlur(radius=self.blur_radius))

        # Composite the blurred region with the original using the mask
        result = Image.composite(blurred_img, img, mask)
        return result

`with_default_models(width_scale=2.0, height_scale=4.0, blur_radius=1, line_width=3, fill_color='black')` `classmethod`

Load default models and create an instance of AddGlassesAugmentation.

Source code in src/training/augmentation/glasses.py

@classmethod
def with_default_models(
    cls,
    width_scale=2.0,
    height_scale=4.0,
    blur_radius=1,
    line_width=3,
    fill_color="black",
) -> Self:
    """Load default models and create an instance of AddGlassesAugmentation."""
    frontal_face_detector = get_dlib_frontal_face_detector()
    shape_predictor = get_dlib_shape_predictor()

    return cls(
        frontal_face_detector=frontal_face_detector,
        shape_predictor=shape_predictor,
        width_scale=width_scale,
        height_scale=height_scale,
        blur_radius=blur_radius,
        line_width=line_width,
        fill_color=fill_color,
    )

`AddMaskAugmentation`

Image transformation, generates a face mask based on landmark points

Based on https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

Source code in src/training/augmentation/mask.py

class AddMaskAugmentation:
    """Image transformation, generates a face mask based on landmark points

    Based on https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d
    """

    def __init__(self, frontal_face_detector, shape_predictor, color="black"):
        self.frontal_face_detector = frontal_face_detector
        self.shape_predictor = shape_predictor
        self.color = color

    @classmethod
    def with_default_models(cls, color="black"):
        """Load default models and create an instance of GenerateMask."""
        frontal_face_detector = get_dlib_frontal_face_detector()
        shape_predictor = get_dlib_shape_predictor()

        return cls(
            frontal_face_detector=frontal_face_detector,
            shape_predictor=shape_predictor,
            color=color,
        )

    def __call__(self, img: Image.Image) -> Image.Image:
        img_numpy = np.array(img)
        try:
            # This seems to fail if face is already centered
            face = self.frontal_face_detector(img_numpy)[0]
        except IndexError:
            print(
                f"{self.__class__.__name__}: No face detected, using entire image to detect landmarks."
            )
            # Proceed as if the face is centered
            face = dlib.rectangle(0, 0, img_numpy.shape[1], img_numpy.shape[0])  # type: ignore

        landmarks = self.shape_predictor(img_numpy, face)

        indices = list(range(1, 16)) + [29]
        points = [(landmarks.part(i).x, landmarks.part(i).y) for i in indices]
        draw = ImageDraw.Draw(img)
        draw.polygon(points, outline=self.color, fill=self.color)

        return img

`with_default_models(color='black')` `classmethod`

Load default models and create an instance of GenerateMask.

Source code in src/training/augmentation/mask.py

@classmethod
def with_default_models(cls, color="black"):
    """Load default models and create an instance of GenerateMask."""
    frontal_face_detector = get_dlib_frontal_face_detector()
    shape_predictor = get_dlib_shape_predictor()

    return cls(
        frontal_face_detector=frontal_face_detector,
        shape_predictor=shape_predictor,
        color=color,
    )

`AddRandomBlackRectangle`

Bases: BaseAddRandomRectangle

Add a random black rectangle to an image.

Source code in src/training/augmentation/random_rectangle.py

class AddRandomBlackRectangle(BaseAddRandomRectangle):
    """Add a random black rectangle to an image."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get the color black for the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The RGB color black (0, 0, 0).
        """
        return 0, 0, 0

`get_color(img, rect)`

Get the color black for the rectangle.

Parameters:

Name	Type	Description	Default
`img`		The image being modified.	required
`rect`		The rectangle coordinates.	required

Returns:

Type	Description
`tuple[int, int, int]`	The RGB color black (0, 0, 0).

Source code in src/training/augmentation/random_rectangle.py

@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get the color black for the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The RGB color black (0, 0, 0).
    """
    return 0, 0, 0

`AddRandomRectangleAverageColor`

Bases: BaseAddRandomRectangle

Add a random rectangle with the average color of the occluded region.

Source code in src/training/augmentation/random_rectangle.py

class AddRandomRectangleAverageColor(BaseAddRandomRectangle):
    """Add a random rectangle with the average color of the occluded region."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get the average color of the rectangle region in the image.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The average RGB color of the rectangle region.
        """
        x1, y1, x2, y2 = rect
        cropped_area = img.crop((x1, y1, x2, y2))
        pixels = list(cropped_area.getdata())
        r = sum(p[0] for p in pixels) // len(pixels)
        g = sum(p[1] for p in pixels) // len(pixels)
        b = sum(p[2] for p in pixels) // len(pixels)
        return r, g, b

`get_color(img, rect)`

Get the average color of the rectangle region in the image.

Parameters:

Name	Type	Description	Default
`img`		The image being modified.	required
`rect`		The rectangle coordinates.	required

Returns:

Type	Description
`tuple[int, int, int]`	The average RGB color of the rectangle region.

Source code in src/training/augmentation/random_rectangle.py

@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get the average color of the rectangle region in the image.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The average RGB color of the rectangle region.
    """
    x1, y1, x2, y2 = rect
    cropped_area = img.crop((x1, y1, x2, y2))
    pixels = list(cropped_area.getdata())
    r = sum(p[0] for p in pixels) // len(pixels)
    g = sum(p[1] for p in pixels) // len(pixels)
    b = sum(p[2] for p in pixels) // len(pixels)
    return r, g, b

`AddRandomRectangleRandomColor`

Bases: BaseAddRandomRectangle

Add a random rectangle with a random color to an image.

Source code in src/training/augmentation/random_rectangle.py

class AddRandomRectangleRandomColor(BaseAddRandomRectangle):
    """Add a random rectangle with a random color to an image."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get a random color for the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            A random RGB color.
        """
        r, g, b = tuple(random.randint(0, 255) for _ in range(3))
        return r, g, b

`get_color(img, rect)`

Get a random color for the rectangle.

Parameters:

Name	Type	Description	Default
`img`		The image being modified.	required
`rect`		The rectangle coordinates.	required

Returns:

Type	Description
`tuple[int, int, int]`	A random RGB color.

Source code in src/training/augmentation/random_rectangle.py

@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get a random color for the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        A random RGB color.
    """
    r, g, b = tuple(random.randint(0, 255) for _ in range(3))
    return r, g, b

`GaussianNoisePIL`

Adapter for applying Gaussian noise to PIL images.

Source code in src/training/augmentation/gaussian_noise_pil.py

class GaussianNoisePIL:
    """Adapter for applying Gaussian noise to PIL images."""

    def __init__(self, mean=0.0, sigma=0.1):
        self.mean = mean
        self.sigma = sigma

    def __call__(self, img: Image.Image) -> Image.Image:
        """Apply Gaussian noise to the input PIL image."""
        img_tensor = self.pil_to_tensor(img)
        noisy_tensor = gaussian_noise(img_tensor, mean=self.mean, sigma=self.sigma)
        return to_pil_image(noisy_tensor)

    @staticmethod
    def pil_to_tensor(img: Image.Image):
        return to_dtype(to_image(img), dtype=torch.float32, scale=True)

`call(img)`

Apply Gaussian noise to the input PIL image.

Source code in src/training/augmentation/gaussian_noise_pil.py

def __call__(self, img: Image.Image) -> Image.Image:
    """Apply Gaussian noise to the input PIL image."""
    img_tensor = self.pil_to_tensor(img)
    noisy_tensor = gaussian_noise(img_tensor, mean=self.mean, sigma=self.sigma)
    return to_pil_image(noisy_tensor)

`gaussian_noise_pil`

`GaussianNoisePIL`

Adapter for applying Gaussian noise to PIL images.

Source code in src/training/augmentation/gaussian_noise_pil.py

class GaussianNoisePIL:
    """Adapter for applying Gaussian noise to PIL images."""

    def __init__(self, mean=0.0, sigma=0.1):
        self.mean = mean
        self.sigma = sigma

    def __call__(self, img: Image.Image) -> Image.Image:
        """Apply Gaussian noise to the input PIL image."""
        img_tensor = self.pil_to_tensor(img)
        noisy_tensor = gaussian_noise(img_tensor, mean=self.mean, sigma=self.sigma)
        return to_pil_image(noisy_tensor)

    @staticmethod
    def pil_to_tensor(img: Image.Image):
        return to_dtype(to_image(img), dtype=torch.float32, scale=True)

`call(img)`

Apply Gaussian noise to the input PIL image.

Source code in src/training/augmentation/gaussian_noise_pil.py

def __call__(self, img: Image.Image) -> Image.Image:
    """Apply Gaussian noise to the input PIL image."""
    img_tensor = self.pil_to_tensor(img)
    noisy_tensor = gaussian_noise(img_tensor, mean=self.mean, sigma=self.sigma)
    return to_pil_image(noisy_tensor)

`glasses`

`AddGlassesAugmentation`

Draws glasses on a PIL image, based on face landmarks.

Reference: https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

Uses face detector and shape predictor (model for extracting face landmarks) from dlib. The glasses are composed of 3 line segments and 2 ellipses, inside the ellipses a Gaussian blur is applied.

Example usage:

from src.training.augmentation import AddGlassesAugmentation from PIL import Image img = Image.open("image.jpg") add_glasses = AddGlassesAugmentation.with_default_models() img_with_glasses = add_glasses(img)

Source code in src/training/augmentation/glasses.py

class AddGlassesAugmentation:
    """Draws glasses on a PIL image, based on face landmarks.

    Reference: https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

    Uses face detector and shape predictor (model for extracting face landmarks) from dlib.
    The glasses are composed of 3 line segments and 2 ellipses, inside the ellipses a Gaussian blur is applied.

    Example usage:
    >>> from src.training.augmentation import AddGlassesAugmentation
    >>> from PIL import Image
    >>> img = Image.open("image.jpg")
    >>> add_glasses = AddGlassesAugmentation.with_default_models()
    >>> img_with_glasses = add_glasses(img)
    """

    def __init__(
        self,
        frontal_face_detector,
        shape_predictor,
        width_scale=2.0,
        height_scale=4.0,
        blur_radius=1,
        line_width=3,
        fill_color="black",
    ):
        self.frontal_face_detector = frontal_face_detector
        self.shape_predictor = shape_predictor
        self.width_scale = width_scale
        self.height_scale = height_scale
        self.blur_radius = blur_radius
        self.line_width = line_width
        self.fill_color = fill_color

    @classmethod
    def with_default_models(
        cls,
        width_scale=2.0,
        height_scale=4.0,
        blur_radius=1,
        line_width=3,
        fill_color="black",
    ) -> Self:
        """Load default models and create an instance of AddGlassesAugmentation."""
        frontal_face_detector = get_dlib_frontal_face_detector()
        shape_predictor = get_dlib_shape_predictor()

        return cls(
            frontal_face_detector=frontal_face_detector,
            shape_predictor=shape_predictor,
            width_scale=width_scale,
            height_scale=height_scale,
            blur_radius=blur_radius,
            line_width=line_width,
            fill_color=fill_color,
        )

    def __call__(self, img: Image.Image) -> Image.Image:
        img_numpy = np.array(img)

        try:
            # This seems to fail if face is already centered
            face = self.frontal_face_detector(img_numpy)[0]
        except IndexError:
            print(
                f"{self.__class__.__name__}: No face detected, using entire image to detect landmarks."
            )
            # Proceed as if the face is centered
            face = dlib.rectangle(0, 0, img_numpy.shape[1], img_numpy.shape[0])  # type: ignore

        landmarks = self.shape_predictor(img_numpy, face)

        left_eye_bbox = self._left_glass_bounding_box(landmarks)
        right_eye_bbox = self._right_glass_bounding_box(landmarks)

        img = self._apply_blur_to_ellipse(img, left_eye_bbox)
        img = self._apply_blur_to_ellipse(img, right_eye_bbox)

        draw = ImageDraw.Draw(img)
        draw.ellipse(left_eye_bbox, outline=self.fill_color, width=self.line_width)
        draw.ellipse(right_eye_bbox, outline=self.fill_color, width=self.line_width)
        draw.line(
            self._middle_part(landmarks), fill=self.fill_color, width=self.line_width
        )
        draw.line(
            self._left_part(landmarks), fill=self.fill_color, width=self.line_width
        )
        draw.line(
            self._right_part(landmarks), fill=self.fill_color, width=self.line_width
        )

        return img

    def _left_glass_bounding_box(self, landmarks):
        left_eye_width = (
            abs(landmarks.part(39).x - landmarks.part(36).x) * self.width_scale
        )
        left_eye_height = (
            abs(landmarks.part(37).y - landmarks.part(41).y) * self.height_scale
        )
        left_eye_center_x = (landmarks.part(36).x + landmarks.part(39).x) / 2
        left_eye_center_y = (landmarks.part(36).y + landmarks.part(39).y) / 2

        top_left = (
            left_eye_center_x - left_eye_width / 2,
            left_eye_center_y - left_eye_height / 2,
        )
        bottom_right = (
            left_eye_center_x + left_eye_width / 2,
            left_eye_center_y + left_eye_height / 2,
        )
        return top_left, bottom_right

    def _right_glass_bounding_box(self, landmarks):
        right_eye_width = (
            abs(landmarks.part(45).x - landmarks.part(42).x) * self.width_scale
        )
        right_eye_height = (
            abs(landmarks.part(43).y - landmarks.part(47).y) * self.height_scale
        )
        right_eye_center_x = (landmarks.part(42).x + landmarks.part(45).x) / 2
        right_eye_center_y = (landmarks.part(42).y + landmarks.part(45).y) / 2

        top_left = (
            right_eye_center_x - right_eye_width / 2,
            right_eye_center_y - right_eye_height / 2,
        )
        bottom_right = (
            right_eye_center_x + right_eye_width / 2,
            right_eye_center_y + right_eye_height / 2,
        )
        return top_left, bottom_right

    def _middle_part(self, landmarks):
        left_bbox = self._left_glass_bounding_box(landmarks)
        right_bbox = self._right_glass_bounding_box(landmarks)
        left = (
            left_bbox[1][0],
            (left_bbox[0][1] + left_bbox[1][1]) / 2,
        )
        right = (
            right_bbox[0][0],
            (right_bbox[0][1] + right_bbox[1][1]) / 2,
        )
        return left, right

    def _left_part(self, landmarks):
        left_bbox = self._left_glass_bounding_box(landmarks)
        left = (landmarks.part(0).x, landmarks.part(0).y)
        right = (left_bbox[0][0], (left_bbox[0][1] + left_bbox[1][1]) / 2)
        return left, right

    def _right_part(self, landmarks):
        right_bbox = self._right_glass_bounding_box(landmarks)
        left = (right_bbox[1][0], (right_bbox[0][1] + right_bbox[1][1]) / 2)
        right = (landmarks.part(16).x, landmarks.part(16).y)
        return left, right

    def _apply_blur_to_ellipse(self, img, ellipse_bbox):
        """Apply Gaussian blur inside an ellipse region"""
        mask = Image.new("L", img.size, 0)  # Black mask
        mask_draw = ImageDraw.Draw(mask)
        mask_draw.ellipse(ellipse_bbox, fill=255)  # White ellipse

        blurred_img = img.filter(ImageFilter.GaussianBlur(radius=self.blur_radius))

        # Composite the blurred region with the original using the mask
        result = Image.composite(blurred_img, img, mask)
        return result

`with_default_models(width_scale=2.0, height_scale=4.0, blur_radius=1, line_width=3, fill_color='black')` `classmethod`

Load default models and create an instance of AddGlassesAugmentation.

Source code in src/training/augmentation/glasses.py

@classmethod
def with_default_models(
    cls,
    width_scale=2.0,
    height_scale=4.0,
    blur_radius=1,
    line_width=3,
    fill_color="black",
) -> Self:
    """Load default models and create an instance of AddGlassesAugmentation."""
    frontal_face_detector = get_dlib_frontal_face_detector()
    shape_predictor = get_dlib_shape_predictor()

    return cls(
        frontal_face_detector=frontal_face_detector,
        shape_predictor=shape_predictor,
        width_scale=width_scale,
        height_scale=height_scale,
        blur_radius=blur_radius,
        line_width=line_width,
        fill_color=fill_color,
    )

`mask`

`AddMaskAugmentation`

Image transformation, generates a face mask based on landmark points

Based on https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

Source code in src/training/augmentation/mask.py

class AddMaskAugmentation:
    """Image transformation, generates a face mask based on landmark points

    Based on https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d
    """

    def __init__(self, frontal_face_detector, shape_predictor, color="black"):
        self.frontal_face_detector = frontal_face_detector
        self.shape_predictor = shape_predictor
        self.color = color

    @classmethod
    def with_default_models(cls, color="black"):
        """Load default models and create an instance of GenerateMask."""
        frontal_face_detector = get_dlib_frontal_face_detector()
        shape_predictor = get_dlib_shape_predictor()

        return cls(
            frontal_face_detector=frontal_face_detector,
            shape_predictor=shape_predictor,
            color=color,
        )

    def __call__(self, img: Image.Image) -> Image.Image:
        img_numpy = np.array(img)
        try:
            # This seems to fail if face is already centered
            face = self.frontal_face_detector(img_numpy)[0]
        except IndexError:
            print(
                f"{self.__class__.__name__}: No face detected, using entire image to detect landmarks."
            )
            # Proceed as if the face is centered
            face = dlib.rectangle(0, 0, img_numpy.shape[1], img_numpy.shape[0])  # type: ignore

        landmarks = self.shape_predictor(img_numpy, face)

        indices = list(range(1, 16)) + [29]
        points = [(landmarks.part(i).x, landmarks.part(i).y) for i in indices]
        draw = ImageDraw.Draw(img)
        draw.polygon(points, outline=self.color, fill=self.color)

        return img

`with_default_models(color='black')` `classmethod`

Load default models and create an instance of GenerateMask.

Source code in src/training/augmentation/mask.py

@classmethod
def with_default_models(cls, color="black"):
    """Load default models and create an instance of GenerateMask."""
    frontal_face_detector = get_dlib_frontal_face_detector()
    shape_predictor = get_dlib_shape_predictor()

    return cls(
        frontal_face_detector=frontal_face_detector,
        shape_predictor=shape_predictor,
        color=color,
    )

`random_rectangle`

Module for adding random rectangles to images with various color strategies.

This module provides an abstract base class and concrete implementations for adding random rectangles to images. The rectangles can have random colors, a fixed black color, or the average color of the occluded region.

`AddRandomBlackRectangle`

Bases: BaseAddRandomRectangle

Add a random black rectangle to an image.

Source code in src/training/augmentation/random_rectangle.py

class AddRandomBlackRectangle(BaseAddRandomRectangle):
    """Add a random black rectangle to an image."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get the color black for the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The RGB color black (0, 0, 0).
        """
        return 0, 0, 0

`get_color(img, rect)`

Get the color black for the rectangle.

Parameters:

Name	Type	Description	Default
`img`		The image being modified.	required
`rect`		The rectangle coordinates.	required

Returns:

Type	Description
`tuple[int, int, int]`	The RGB color black (0, 0, 0).

Source code in src/training/augmentation/random_rectangle.py

@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get the color black for the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The RGB color black (0, 0, 0).
    """
    return 0, 0, 0

`AddRandomRectangleAverageColor`

Bases: BaseAddRandomRectangle

Add a random rectangle with the average color of the occluded region.

Source code in src/training/augmentation/random_rectangle.py

class AddRandomRectangleAverageColor(BaseAddRandomRectangle):
    """Add a random rectangle with the average color of the occluded region."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get the average color of the rectangle region in the image.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The average RGB color of the rectangle region.
        """
        x1, y1, x2, y2 = rect
        cropped_area = img.crop((x1, y1, x2, y2))
        pixels = list(cropped_area.getdata())
        r = sum(p[0] for p in pixels) // len(pixels)
        g = sum(p[1] for p in pixels) // len(pixels)
        b = sum(p[2] for p in pixels) // len(pixels)
        return r, g, b

`get_color(img, rect)`

Get the average color of the rectangle region in the image.

Parameters:

Name	Type	Description	Default
`img`		The image being modified.	required
`rect`		The rectangle coordinates.	required

Returns:

Type	Description
`tuple[int, int, int]`	The average RGB color of the rectangle region.

Source code in src/training/augmentation/random_rectangle.py

@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get the average color of the rectangle region in the image.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The average RGB color of the rectangle region.
    """
    x1, y1, x2, y2 = rect
    cropped_area = img.crop((x1, y1, x2, y2))
    pixels = list(cropped_area.getdata())
    r = sum(p[0] for p in pixels) // len(pixels)
    g = sum(p[1] for p in pixels) // len(pixels)
    b = sum(p[2] for p in pixels) // len(pixels)
    return r, g, b

`AddRandomRectangleRandomColor`

Bases: BaseAddRandomRectangle

Add a random rectangle with a random color to an image.

Source code in src/training/augmentation/random_rectangle.py

class AddRandomRectangleRandomColor(BaseAddRandomRectangle):
    """Add a random rectangle with a random color to an image."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get a random color for the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            A random RGB color.
        """
        r, g, b = tuple(random.randint(0, 255) for _ in range(3))
        return r, g, b

`get_color(img, rect)`

Get a random color for the rectangle.

Parameters:

Name	Type	Description	Default
`img`		The image being modified.	required
`rect`		The rectangle coordinates.	required

Returns:

Type	Description
`tuple[int, int, int]`	A random RGB color.

Source code in src/training/augmentation/random_rectangle.py

@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get a random color for the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        A random RGB color.
    """
    r, g, b = tuple(random.randint(0, 255) for _ in range(3))
    return r, g, b

`BaseAddRandomRectangle`

Bases: ABC

Base class for adding a random rectangle to an image.

Attributes:

Name	Type	Description
`min_size`		Minimum size of the rectangle.
`max_size`		Maximum size of the rectangle.

Source code in src/training/augmentation/random_rectangle.py

class BaseAddRandomRectangle(ABC):
    """Base class for adding a random rectangle to an image.

    Attributes:
        min_size: Minimum size of the rectangle.
        max_size: Maximum size of the rectangle.
    """

    def __init__(self, min_size=10, max_size=50):
        """Initialize the base class with rectangle size constraints.

        Args:
            min_size: Minimum size of the rectangle.
            max_size: Maximum size of the rectangle.
        """
        self.min_size = min_size
        self.max_size = max_size

    def __call__(self, img):
        """Add a random rectangle to the given image.

        Args:
            img: The image to modify.

        Returns:
            The modified image with a random rectangle.
        """
        draw = ImageDraw.Draw(img)
        width, height = img.size

        rect_width = random.randint(self.min_size, self.max_size)
        rect_height = random.randint(self.min_size, self.max_size)
        x1 = random.randint(0, width - rect_width)
        y1 = random.randint(0, height - rect_height)
        x2 = x1 + rect_width
        y2 = y1 + rect_height
        rect = [x1, y1, x2, y2]

        color = self.get_color(img, rect)

        draw.rectangle(rect, fill=color)
        return img

    @abstractmethod
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Abstract method to determine the color of the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The color of the rectangle in RGB format.
        """
        pass

`call(img)`

Add a random rectangle to the given image.

Parameters:

Name	Type	Description	Default
`img`		The image to modify.	required

Returns:

Type	Description
	The modified image with a random rectangle.

Source code in src/training/augmentation/random_rectangle.py

def __call__(self, img):
    """Add a random rectangle to the given image.

    Args:
        img: The image to modify.

    Returns:
        The modified image with a random rectangle.
    """
    draw = ImageDraw.Draw(img)
    width, height = img.size

    rect_width = random.randint(self.min_size, self.max_size)
    rect_height = random.randint(self.min_size, self.max_size)
    x1 = random.randint(0, width - rect_width)
    y1 = random.randint(0, height - rect_height)
    x2 = x1 + rect_width
    y2 = y1 + rect_height
    rect = [x1, y1, x2, y2]

    color = self.get_color(img, rect)

    draw.rectangle(rect, fill=color)
    return img

`init(min_size=10, max_size=50)`

Initialize the base class with rectangle size constraints.

Parameters:

Name	Type	Description	Default
`min_size`		Minimum size of the rectangle.	`10`
`max_size`		Maximum size of the rectangle.	`50`

Source code in src/training/augmentation/random_rectangle.py

def __init__(self, min_size=10, max_size=50):
    """Initialize the base class with rectangle size constraints.

    Args:
        min_size: Minimum size of the rectangle.
        max_size: Maximum size of the rectangle.
    """
    self.min_size = min_size
    self.max_size = max_size

`get_color(img, rect)` `abstractmethod`

Abstract method to determine the color of the rectangle.

Parameters:

Name	Type	Description	Default
`img`		The image being modified.	required
`rect`		The rectangle coordinates.	required

Returns:

Type	Description
`tuple[int, int, int]`	The color of the rectangle in RGB format.

Source code in src/training/augmentation/random_rectangle.py

@abstractmethod
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Abstract method to determine the color of the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The color of the rectangle in RGB format.
    """
    pass

`fine_tuning`

`count_correct(embeddings_1, embeddings_2, labels)`

Count correct embedding pairs with respect to labels.

For each triple (e1, e2, label): - label is 1 for the same person and -1 for different people - cosine similarity is used to determine if e1 and e2 are similar enough - similarity is in range [-1, 1], where 1 means very similar - if similarity >= 0, then e1 and e2 are considered similar (label 1) - if similarity < 0, then e1 and e2 are considered different (label -1)

B - batch dimension E - embedding dimension

Parameters:

Name	Type	Description	Default
`embeddings_1`	`Tensor`	Embeddings of the first image in the pair, shape (B, E).	required
`embeddings_2`	`Tensor`	Embeddings of the second image in the pair, shape (B, E).	required
`labels`	`Tensor`	Labels indicating if the pairs are from the same person (1) or different people (-1), shape (B,).	required

Source code in src/training/fine_tuning.py

def count_correct(
    embeddings_1: Tensor,
    embeddings_2: Tensor,
    labels: Tensor,
) -> float:
    """Count correct embedding pairs with respect to labels.

    For each triple (e1, e2, label):
    - label is 1 for the same person and -1 for different people
    - cosine similarity is used to determine if e1 and e2 are similar enough
    - similarity is in range [-1, 1], where 1 means very similar
    - if similarity >= 0, then e1 and e2 are considered similar (label 1)
    - if similarity < 0, then e1 and e2 are considered different (label -1)

    B - batch dimension
    E - embedding dimension

    Args:
        embeddings_1 (Tensor): Embeddings of the first image in the pair, shape (B, E).
        embeddings_2 (Tensor): Embeddings of the second image in the pair, shape (B, E).
        labels (Tensor): Labels indicating if the pairs are from the same person (1) or different people (-1), shape (B,).
    """

    similarities = F.cosine_similarity(embeddings_1, embeddings_2)
    predicted_labels = (similarities >= 0).float()  # 1 if similar enough, 0 otherwise
    predicted_labels = (
        predicted_labels * 2 - 1
    )  # Convert from {0,1} to {-1, 1} (to match label format)
    correct_predictions = (predicted_labels == labels).sum().item()

    return correct_predictions

`loss`

`ContrastiveLoss`

Bases: Module

Source code in src/training/loss/contrastive_loss.py

class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        """Label is 0 for the same person, 1 for different person"""
        euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
        loss = torch.mean(
            (1 - label) * torch.pow(euclidean_distance, 2)
            + label
            * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
        )
        return loss

`forward(output1, output2, label)`

Label is 0 for the same person, 1 for different person

Source code in src/training/loss/contrastive_loss.py

def forward(self, output1, output2, label):
    """Label is 0 for the same person, 1 for different person"""
    euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
    loss = torch.mean(
        (1 - label) * torch.pow(euclidean_distance, 2)
        + label
        * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
    )
    return loss

`contrastive_loss`

`ContrastiveLoss`

Bases: Module

Source code in src/training/loss/contrastive_loss.py

class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        """Label is 0 for the same person, 1 for different person"""
        euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
        loss = torch.mean(
            (1 - label) * torch.pow(euclidean_distance, 2)
            + label
            * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
        )
        return loss

`forward(output1, output2, label)`

Label is 0 for the same person, 1 for different person

Source code in src/training/loss/contrastive_loss.py

def forward(self, output1, output2, label):
    """Label is 0 for the same person, 1 for different person"""
    euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
    loss = torch.mean(
        (1 - label) * torch.pow(euclidean_distance, 2)
        + label
        * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
    )
    return loss

Training

augmentation

AddGlassesAugmentation

with_default_models(width_scale=2.0, height_scale=4.0, blur_radius=1, line_width=3, fill_color='black') classmethod

AddMaskAugmentation

with_default_models(color='black') classmethod

AddRandomBlackRectangle

get_color(img, rect)

AddRandomRectangleAverageColor

get_color(img, rect)

AddRandomRectangleRandomColor

get_color(img, rect)

GaussianNoisePIL

__call__(img)

gaussian_noise_pil

GaussianNoisePIL

__call__(img)

glasses

AddGlassesAugmentation

with_default_models(width_scale=2.0, height_scale=4.0, blur_radius=1, line_width=3, fill_color='black') classmethod

mask

AddMaskAugmentation

with_default_models(color='black') classmethod

random_rectangle

AddRandomBlackRectangle

get_color(img, rect)

AddRandomRectangleAverageColor

get_color(img, rect)

AddRandomRectangleRandomColor

get_color(img, rect)

BaseAddRandomRectangle

__call__(img)

__init__(min_size=10, max_size=50)

get_color(img, rect) abstractmethod

fine_tuning

count_correct(embeddings_1, embeddings_2, labels)

loss

ContrastiveLoss

forward(output1, output2, label)

contrastive_loss

ContrastiveLoss

forward(output1, output2, label)

`augmentation`

`AddGlassesAugmentation`

`with_default_models(width_scale=2.0, height_scale=4.0, blur_radius=1, line_width=3, fill_color='black')` `classmethod`

`AddMaskAugmentation`

`with_default_models(color='black')` `classmethod`

`AddRandomBlackRectangle`

`get_color(img, rect)`

`AddRandomRectangleAverageColor`

`get_color(img, rect)`

`AddRandomRectangleRandomColor`

`get_color(img, rect)`

`GaussianNoisePIL`

`call(img)`

`gaussian_noise_pil`

`GaussianNoisePIL`

`call(img)`

`glasses`

`AddGlassesAugmentation`

`with_default_models(width_scale=2.0, height_scale=4.0, blur_radius=1, line_width=3, fill_color='black')` `classmethod`

`mask`

`AddMaskAugmentation`

`with_default_models(color='black')` `classmethod`

`random_rectangle`

`AddRandomBlackRectangle`

`get_color(img, rect)`

`AddRandomRectangleAverageColor`

`get_color(img, rect)`

`AddRandomRectangleRandomColor`

`get_color(img, rect)`

`BaseAddRandomRectangle`

`call(img)`

`init(min_size=10, max_size=50)`

`get_color(img, rect)` `abstractmethod`

`fine_tuning`

`count_correct(embeddings_1, embeddings_2, labels)`

`loss`

`ContrastiveLoss`

`forward(output1, output2, label)`

`contrastive_loss`

`ContrastiveLoss`

`forward(output1, output2, label)`