Skip to content

Training

augmentation

AddGlassesAugmentation

Draws glasses on a PIL image, based on face landmarks.

Reference: https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

Uses face detector and shape predictor (model for extracting face landmarks) from dlib. The glasses are composed of 3 line segments and 2 ellipses, inside the ellipses a Gaussian blur is applied.

Example usage:

from src.training.augmentation import AddGlassesAugmentation from PIL import Image img = Image.open("image.jpg") add_glasses = AddGlassesAugmentation.with_default_models() img_with_glasses = add_glasses(img)

Source code in src/training/augmentation/glasses.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
class AddGlassesAugmentation:
    """Draws glasses on a PIL image, based on face landmarks.

    Reference: https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

    Uses face detector and shape predictor (model for extracting face landmarks) from dlib.
    The glasses are composed of 3 line segments and 2 ellipses, inside the ellipses a Gaussian blur is applied.

    Example usage:
    >>> from src.training.augmentation import AddGlassesAugmentation
    >>> from PIL import Image
    >>> img = Image.open("image.jpg")
    >>> add_glasses = AddGlassesAugmentation.with_default_models()
    >>> img_with_glasses = add_glasses(img)
    """

    def __init__(
        self,
        frontal_face_detector,
        shape_predictor,
        width_scale=2.0,
        height_scale=4.0,
        blur_radius=1,
        line_width=3,
        fill_color="black",
    ):
        self.frontal_face_detector = frontal_face_detector
        self.shape_predictor = shape_predictor
        self.width_scale = width_scale
        self.height_scale = height_scale
        self.blur_radius = blur_radius
        self.line_width = line_width
        self.fill_color = fill_color

    @classmethod
    def with_default_models(
        cls,
        width_scale=2.0,
        height_scale=4.0,
        blur_radius=1,
        line_width=3,
        fill_color="black",
    ) -> Self:
        """Load default models and create an instance of AddGlassesAugmentation."""
        frontal_face_detector = get_dlib_frontal_face_detector()
        shape_predictor = get_dlib_shape_predictor()

        return cls(
            frontal_face_detector=frontal_face_detector,
            shape_predictor=shape_predictor,
            width_scale=width_scale,
            height_scale=height_scale,
            blur_radius=blur_radius,
            line_width=line_width,
            fill_color=fill_color,
        )

    def __call__(self, img: Image.Image) -> Image.Image:
        img_numpy = np.array(img)

        try:
            # This seems to fail if face is already centered
            face = self.frontal_face_detector(img_numpy)[0]
        except IndexError:
            print(
                f"{self.__class__.__name__}: No face detected, using entire image to detect landmarks."
            )
            # Proceed as if the face is centered
            face = dlib.rectangle(0, 0, img_numpy.shape[1], img_numpy.shape[0])  # type: ignore

        landmarks = self.shape_predictor(img_numpy, face)

        left_eye_bbox = self._left_glass_bounding_box(landmarks)
        right_eye_bbox = self._right_glass_bounding_box(landmarks)

        img = self._apply_blur_to_ellipse(img, left_eye_bbox)
        img = self._apply_blur_to_ellipse(img, right_eye_bbox)

        draw = ImageDraw.Draw(img)
        draw.ellipse(left_eye_bbox, outline=self.fill_color, width=self.line_width)
        draw.ellipse(right_eye_bbox, outline=self.fill_color, width=self.line_width)
        draw.line(
            self._middle_part(landmarks), fill=self.fill_color, width=self.line_width
        )
        draw.line(
            self._left_part(landmarks), fill=self.fill_color, width=self.line_width
        )
        draw.line(
            self._right_part(landmarks), fill=self.fill_color, width=self.line_width
        )

        return img

    def _left_glass_bounding_box(self, landmarks):
        left_eye_width = (
            abs(landmarks.part(39).x - landmarks.part(36).x) * self.width_scale
        )
        left_eye_height = (
            abs(landmarks.part(37).y - landmarks.part(41).y) * self.height_scale
        )
        left_eye_center_x = (landmarks.part(36).x + landmarks.part(39).x) / 2
        left_eye_center_y = (landmarks.part(36).y + landmarks.part(39).y) / 2

        top_left = (
            left_eye_center_x - left_eye_width / 2,
            left_eye_center_y - left_eye_height / 2,
        )
        bottom_right = (
            left_eye_center_x + left_eye_width / 2,
            left_eye_center_y + left_eye_height / 2,
        )
        return top_left, bottom_right

    def _right_glass_bounding_box(self, landmarks):
        right_eye_width = (
            abs(landmarks.part(45).x - landmarks.part(42).x) * self.width_scale
        )
        right_eye_height = (
            abs(landmarks.part(43).y - landmarks.part(47).y) * self.height_scale
        )
        right_eye_center_x = (landmarks.part(42).x + landmarks.part(45).x) / 2
        right_eye_center_y = (landmarks.part(42).y + landmarks.part(45).y) / 2

        top_left = (
            right_eye_center_x - right_eye_width / 2,
            right_eye_center_y - right_eye_height / 2,
        )
        bottom_right = (
            right_eye_center_x + right_eye_width / 2,
            right_eye_center_y + right_eye_height / 2,
        )
        return top_left, bottom_right

    def _middle_part(self, landmarks):
        left_bbox = self._left_glass_bounding_box(landmarks)
        right_bbox = self._right_glass_bounding_box(landmarks)
        left = (
            left_bbox[1][0],
            (left_bbox[0][1] + left_bbox[1][1]) / 2,
        )
        right = (
            right_bbox[0][0],
            (right_bbox[0][1] + right_bbox[1][1]) / 2,
        )
        return left, right

    def _left_part(self, landmarks):
        left_bbox = self._left_glass_bounding_box(landmarks)
        left = (landmarks.part(0).x, landmarks.part(0).y)
        right = (left_bbox[0][0], (left_bbox[0][1] + left_bbox[1][1]) / 2)
        return left, right

    def _right_part(self, landmarks):
        right_bbox = self._right_glass_bounding_box(landmarks)
        left = (right_bbox[1][0], (right_bbox[0][1] + right_bbox[1][1]) / 2)
        right = (landmarks.part(16).x, landmarks.part(16).y)
        return left, right

    def _apply_blur_to_ellipse(self, img, ellipse_bbox):
        """Apply Gaussian blur inside an ellipse region"""
        mask = Image.new("L", img.size, 0)  # Black mask
        mask_draw = ImageDraw.Draw(mask)
        mask_draw.ellipse(ellipse_bbox, fill=255)  # White ellipse

        blurred_img = img.filter(ImageFilter.GaussianBlur(radius=self.blur_radius))

        # Composite the blurred region with the original using the mask
        result = Image.composite(blurred_img, img, mask)
        return result

with_default_models(width_scale=2.0, height_scale=4.0, blur_radius=1, line_width=3, fill_color='black') classmethod

Load default models and create an instance of AddGlassesAugmentation.

Source code in src/training/augmentation/glasses.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@classmethod
def with_default_models(
    cls,
    width_scale=2.0,
    height_scale=4.0,
    blur_radius=1,
    line_width=3,
    fill_color="black",
) -> Self:
    """Load default models and create an instance of AddGlassesAugmentation."""
    frontal_face_detector = get_dlib_frontal_face_detector()
    shape_predictor = get_dlib_shape_predictor()

    return cls(
        frontal_face_detector=frontal_face_detector,
        shape_predictor=shape_predictor,
        width_scale=width_scale,
        height_scale=height_scale,
        blur_radius=blur_radius,
        line_width=line_width,
        fill_color=fill_color,
    )

AddMaskAugmentation

Image transformation, generates a face mask based on landmark points

Based on https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

Source code in src/training/augmentation/mask.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
class AddMaskAugmentation:
    """Image transformation, generates a face mask based on landmark points

    Based on https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d
    """

    def __init__(self, frontal_face_detector, shape_predictor, color="black"):
        self.frontal_face_detector = frontal_face_detector
        self.shape_predictor = shape_predictor
        self.color = color

    @classmethod
    def with_default_models(cls, color="black"):
        """Load default models and create an instance of GenerateMask."""
        frontal_face_detector = get_dlib_frontal_face_detector()
        shape_predictor = get_dlib_shape_predictor()

        return cls(
            frontal_face_detector=frontal_face_detector,
            shape_predictor=shape_predictor,
            color=color,
        )

    def __call__(self, img: Image.Image) -> Image.Image:
        img_numpy = np.array(img)
        try:
            # This seems to fail if face is already centered
            face = self.frontal_face_detector(img_numpy)[0]
        except IndexError:
            print(
                f"{self.__class__.__name__}: No face detected, using entire image to detect landmarks."
            )
            # Proceed as if the face is centered
            face = dlib.rectangle(0, 0, img_numpy.shape[1], img_numpy.shape[0])  # type: ignore

        landmarks = self.shape_predictor(img_numpy, face)

        indices = list(range(1, 16)) + [29]
        points = [(landmarks.part(i).x, landmarks.part(i).y) for i in indices]
        draw = ImageDraw.Draw(img)
        draw.polygon(points, outline=self.color, fill=self.color)

        return img

with_default_models(color='black') classmethod

Load default models and create an instance of GenerateMask.

Source code in src/training/augmentation/mask.py
19
20
21
22
23
24
25
26
27
28
29
@classmethod
def with_default_models(cls, color="black"):
    """Load default models and create an instance of GenerateMask."""
    frontal_face_detector = get_dlib_frontal_face_detector()
    shape_predictor = get_dlib_shape_predictor()

    return cls(
        frontal_face_detector=frontal_face_detector,
        shape_predictor=shape_predictor,
        color=color,
    )

AddRandomBlackRectangle

Bases: BaseAddRandomRectangle

Add a random black rectangle to an image.

Source code in src/training/augmentation/random_rectangle.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
class AddRandomBlackRectangle(BaseAddRandomRectangle):
    """Add a random black rectangle to an image."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get the color black for the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The RGB color black (0, 0, 0).
        """
        return 0, 0, 0

get_color(img, rect)

Get the color black for the rectangle.

Parameters:

Name Type Description Default
img

The image being modified.

required
rect

The rectangle coordinates.

required

Returns:

Type Description
tuple[int, int, int]

The RGB color black (0, 0, 0).

Source code in src/training/augmentation/random_rectangle.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get the color black for the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The RGB color black (0, 0, 0).
    """
    return 0, 0, 0

AddRandomRectangleAverageColor

Bases: BaseAddRandomRectangle

Add a random rectangle with the average color of the occluded region.

Source code in src/training/augmentation/random_rectangle.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
class AddRandomRectangleAverageColor(BaseAddRandomRectangle):
    """Add a random rectangle with the average color of the occluded region."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get the average color of the rectangle region in the image.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The average RGB color of the rectangle region.
        """
        x1, y1, x2, y2 = rect
        cropped_area = img.crop((x1, y1, x2, y2))
        pixels = list(cropped_area.getdata())
        r = sum(p[0] for p in pixels) // len(pixels)
        g = sum(p[1] for p in pixels) // len(pixels)
        b = sum(p[2] for p in pixels) // len(pixels)
        return r, g, b

get_color(img, rect)

Get the average color of the rectangle region in the image.

Parameters:

Name Type Description Default
img

The image being modified.

required
rect

The rectangle coordinates.

required

Returns:

Type Description
tuple[int, int, int]

The average RGB color of the rectangle region.

Source code in src/training/augmentation/random_rectangle.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get the average color of the rectangle region in the image.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The average RGB color of the rectangle region.
    """
    x1, y1, x2, y2 = rect
    cropped_area = img.crop((x1, y1, x2, y2))
    pixels = list(cropped_area.getdata())
    r = sum(p[0] for p in pixels) // len(pixels)
    g = sum(p[1] for p in pixels) // len(pixels)
    b = sum(p[2] for p in pixels) // len(pixels)
    return r, g, b

AddRandomRectangleRandomColor

Bases: BaseAddRandomRectangle

Add a random rectangle with a random color to an image.

Source code in src/training/augmentation/random_rectangle.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class AddRandomRectangleRandomColor(BaseAddRandomRectangle):
    """Add a random rectangle with a random color to an image."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get a random color for the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            A random RGB color.
        """
        r, g, b = tuple(random.randint(0, 255) for _ in range(3))
        return r, g, b

get_color(img, rect)

Get a random color for the rectangle.

Parameters:

Name Type Description Default
img

The image being modified.

required
rect

The rectangle coordinates.

required

Returns:

Type Description
tuple[int, int, int]

A random RGB color.

Source code in src/training/augmentation/random_rectangle.py
75
76
77
78
79
80
81
82
83
84
85
86
87
@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get a random color for the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        A random RGB color.
    """
    r, g, b = tuple(random.randint(0, 255) for _ in range(3))
    return r, g, b

GaussianNoisePIL

Adapter for applying Gaussian noise to PIL images.

Source code in src/training/augmentation/gaussian_noise_pil.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class GaussianNoisePIL:
    """Adapter for applying Gaussian noise to PIL images."""

    def __init__(self, mean=0.0, sigma=0.1):
        self.mean = mean
        self.sigma = sigma

    def __call__(self, img: Image.Image) -> Image.Image:
        """Apply Gaussian noise to the input PIL image."""
        img_tensor = self.pil_to_tensor(img)
        noisy_tensor = gaussian_noise(img_tensor, mean=self.mean, sigma=self.sigma)
        return to_pil_image(noisy_tensor)

    @staticmethod
    def pil_to_tensor(img: Image.Image):
        return to_dtype(to_image(img), dtype=torch.float32, scale=True)

__call__(img)

Apply Gaussian noise to the input PIL image.

Source code in src/training/augmentation/gaussian_noise_pil.py
18
19
20
21
22
def __call__(self, img: Image.Image) -> Image.Image:
    """Apply Gaussian noise to the input PIL image."""
    img_tensor = self.pil_to_tensor(img)
    noisy_tensor = gaussian_noise(img_tensor, mean=self.mean, sigma=self.sigma)
    return to_pil_image(noisy_tensor)

gaussian_noise_pil

GaussianNoisePIL

Adapter for applying Gaussian noise to PIL images.

Source code in src/training/augmentation/gaussian_noise_pil.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class GaussianNoisePIL:
    """Adapter for applying Gaussian noise to PIL images."""

    def __init__(self, mean=0.0, sigma=0.1):
        self.mean = mean
        self.sigma = sigma

    def __call__(self, img: Image.Image) -> Image.Image:
        """Apply Gaussian noise to the input PIL image."""
        img_tensor = self.pil_to_tensor(img)
        noisy_tensor = gaussian_noise(img_tensor, mean=self.mean, sigma=self.sigma)
        return to_pil_image(noisy_tensor)

    @staticmethod
    def pil_to_tensor(img: Image.Image):
        return to_dtype(to_image(img), dtype=torch.float32, scale=True)
__call__(img)

Apply Gaussian noise to the input PIL image.

Source code in src/training/augmentation/gaussian_noise_pil.py
18
19
20
21
22
def __call__(self, img: Image.Image) -> Image.Image:
    """Apply Gaussian noise to the input PIL image."""
    img_tensor = self.pil_to_tensor(img)
    noisy_tensor = gaussian_noise(img_tensor, mean=self.mean, sigma=self.sigma)
    return to_pil_image(noisy_tensor)

glasses

AddGlassesAugmentation

Draws glasses on a PIL image, based on face landmarks.

Reference: https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

Uses face detector and shape predictor (model for extracting face landmarks) from dlib. The glasses are composed of 3 line segments and 2 ellipses, inside the ellipses a Gaussian blur is applied.

Example usage:

from src.training.augmentation import AddGlassesAugmentation from PIL import Image img = Image.open("image.jpg") add_glasses = AddGlassesAugmentation.with_default_models() img_with_glasses = add_glasses(img)

Source code in src/training/augmentation/glasses.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
class AddGlassesAugmentation:
    """Draws glasses on a PIL image, based on face landmarks.

    Reference: https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

    Uses face detector and shape predictor (model for extracting face landmarks) from dlib.
    The glasses are composed of 3 line segments and 2 ellipses, inside the ellipses a Gaussian blur is applied.

    Example usage:
    >>> from src.training.augmentation import AddGlassesAugmentation
    >>> from PIL import Image
    >>> img = Image.open("image.jpg")
    >>> add_glasses = AddGlassesAugmentation.with_default_models()
    >>> img_with_glasses = add_glasses(img)
    """

    def __init__(
        self,
        frontal_face_detector,
        shape_predictor,
        width_scale=2.0,
        height_scale=4.0,
        blur_radius=1,
        line_width=3,
        fill_color="black",
    ):
        self.frontal_face_detector = frontal_face_detector
        self.shape_predictor = shape_predictor
        self.width_scale = width_scale
        self.height_scale = height_scale
        self.blur_radius = blur_radius
        self.line_width = line_width
        self.fill_color = fill_color

    @classmethod
    def with_default_models(
        cls,
        width_scale=2.0,
        height_scale=4.0,
        blur_radius=1,
        line_width=3,
        fill_color="black",
    ) -> Self:
        """Load default models and create an instance of AddGlassesAugmentation."""
        frontal_face_detector = get_dlib_frontal_face_detector()
        shape_predictor = get_dlib_shape_predictor()

        return cls(
            frontal_face_detector=frontal_face_detector,
            shape_predictor=shape_predictor,
            width_scale=width_scale,
            height_scale=height_scale,
            blur_radius=blur_radius,
            line_width=line_width,
            fill_color=fill_color,
        )

    def __call__(self, img: Image.Image) -> Image.Image:
        img_numpy = np.array(img)

        try:
            # This seems to fail if face is already centered
            face = self.frontal_face_detector(img_numpy)[0]
        except IndexError:
            print(
                f"{self.__class__.__name__}: No face detected, using entire image to detect landmarks."
            )
            # Proceed as if the face is centered
            face = dlib.rectangle(0, 0, img_numpy.shape[1], img_numpy.shape[0])  # type: ignore

        landmarks = self.shape_predictor(img_numpy, face)

        left_eye_bbox = self._left_glass_bounding_box(landmarks)
        right_eye_bbox = self._right_glass_bounding_box(landmarks)

        img = self._apply_blur_to_ellipse(img, left_eye_bbox)
        img = self._apply_blur_to_ellipse(img, right_eye_bbox)

        draw = ImageDraw.Draw(img)
        draw.ellipse(left_eye_bbox, outline=self.fill_color, width=self.line_width)
        draw.ellipse(right_eye_bbox, outline=self.fill_color, width=self.line_width)
        draw.line(
            self._middle_part(landmarks), fill=self.fill_color, width=self.line_width
        )
        draw.line(
            self._left_part(landmarks), fill=self.fill_color, width=self.line_width
        )
        draw.line(
            self._right_part(landmarks), fill=self.fill_color, width=self.line_width
        )

        return img

    def _left_glass_bounding_box(self, landmarks):
        left_eye_width = (
            abs(landmarks.part(39).x - landmarks.part(36).x) * self.width_scale
        )
        left_eye_height = (
            abs(landmarks.part(37).y - landmarks.part(41).y) * self.height_scale
        )
        left_eye_center_x = (landmarks.part(36).x + landmarks.part(39).x) / 2
        left_eye_center_y = (landmarks.part(36).y + landmarks.part(39).y) / 2

        top_left = (
            left_eye_center_x - left_eye_width / 2,
            left_eye_center_y - left_eye_height / 2,
        )
        bottom_right = (
            left_eye_center_x + left_eye_width / 2,
            left_eye_center_y + left_eye_height / 2,
        )
        return top_left, bottom_right

    def _right_glass_bounding_box(self, landmarks):
        right_eye_width = (
            abs(landmarks.part(45).x - landmarks.part(42).x) * self.width_scale
        )
        right_eye_height = (
            abs(landmarks.part(43).y - landmarks.part(47).y) * self.height_scale
        )
        right_eye_center_x = (landmarks.part(42).x + landmarks.part(45).x) / 2
        right_eye_center_y = (landmarks.part(42).y + landmarks.part(45).y) / 2

        top_left = (
            right_eye_center_x - right_eye_width / 2,
            right_eye_center_y - right_eye_height / 2,
        )
        bottom_right = (
            right_eye_center_x + right_eye_width / 2,
            right_eye_center_y + right_eye_height / 2,
        )
        return top_left, bottom_right

    def _middle_part(self, landmarks):
        left_bbox = self._left_glass_bounding_box(landmarks)
        right_bbox = self._right_glass_bounding_box(landmarks)
        left = (
            left_bbox[1][0],
            (left_bbox[0][1] + left_bbox[1][1]) / 2,
        )
        right = (
            right_bbox[0][0],
            (right_bbox[0][1] + right_bbox[1][1]) / 2,
        )
        return left, right

    def _left_part(self, landmarks):
        left_bbox = self._left_glass_bounding_box(landmarks)
        left = (landmarks.part(0).x, landmarks.part(0).y)
        right = (left_bbox[0][0], (left_bbox[0][1] + left_bbox[1][1]) / 2)
        return left, right

    def _right_part(self, landmarks):
        right_bbox = self._right_glass_bounding_box(landmarks)
        left = (right_bbox[1][0], (right_bbox[0][1] + right_bbox[1][1]) / 2)
        right = (landmarks.part(16).x, landmarks.part(16).y)
        return left, right

    def _apply_blur_to_ellipse(self, img, ellipse_bbox):
        """Apply Gaussian blur inside an ellipse region"""
        mask = Image.new("L", img.size, 0)  # Black mask
        mask_draw = ImageDraw.Draw(mask)
        mask_draw.ellipse(ellipse_bbox, fill=255)  # White ellipse

        blurred_img = img.filter(ImageFilter.GaussianBlur(radius=self.blur_radius))

        # Composite the blurred region with the original using the mask
        result = Image.composite(blurred_img, img, mask)
        return result
with_default_models(width_scale=2.0, height_scale=4.0, blur_radius=1, line_width=3, fill_color='black') classmethod

Load default models and create an instance of AddGlassesAugmentation.

Source code in src/training/augmentation/glasses.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@classmethod
def with_default_models(
    cls,
    width_scale=2.0,
    height_scale=4.0,
    blur_radius=1,
    line_width=3,
    fill_color="black",
) -> Self:
    """Load default models and create an instance of AddGlassesAugmentation."""
    frontal_face_detector = get_dlib_frontal_face_detector()
    shape_predictor = get_dlib_shape_predictor()

    return cls(
        frontal_face_detector=frontal_face_detector,
        shape_predictor=shape_predictor,
        width_scale=width_scale,
        height_scale=height_scale,
        blur_radius=blur_radius,
        line_width=line_width,
        fill_color=fill_color,
    )

mask

AddMaskAugmentation

Image transformation, generates a face mask based on landmark points

Based on https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d

Source code in src/training/augmentation/mask.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
class AddMaskAugmentation:
    """Image transformation, generates a face mask based on landmark points

    Based on https://xictus77.medium.com/facial-mask-overlay-with-opencv-dlib-4d948964cc4d
    """

    def __init__(self, frontal_face_detector, shape_predictor, color="black"):
        self.frontal_face_detector = frontal_face_detector
        self.shape_predictor = shape_predictor
        self.color = color

    @classmethod
    def with_default_models(cls, color="black"):
        """Load default models and create an instance of GenerateMask."""
        frontal_face_detector = get_dlib_frontal_face_detector()
        shape_predictor = get_dlib_shape_predictor()

        return cls(
            frontal_face_detector=frontal_face_detector,
            shape_predictor=shape_predictor,
            color=color,
        )

    def __call__(self, img: Image.Image) -> Image.Image:
        img_numpy = np.array(img)
        try:
            # This seems to fail if face is already centered
            face = self.frontal_face_detector(img_numpy)[0]
        except IndexError:
            print(
                f"{self.__class__.__name__}: No face detected, using entire image to detect landmarks."
            )
            # Proceed as if the face is centered
            face = dlib.rectangle(0, 0, img_numpy.shape[1], img_numpy.shape[0])  # type: ignore

        landmarks = self.shape_predictor(img_numpy, face)

        indices = list(range(1, 16)) + [29]
        points = [(landmarks.part(i).x, landmarks.part(i).y) for i in indices]
        draw = ImageDraw.Draw(img)
        draw.polygon(points, outline=self.color, fill=self.color)

        return img
with_default_models(color='black') classmethod

Load default models and create an instance of GenerateMask.

Source code in src/training/augmentation/mask.py
19
20
21
22
23
24
25
26
27
28
29
@classmethod
def with_default_models(cls, color="black"):
    """Load default models and create an instance of GenerateMask."""
    frontal_face_detector = get_dlib_frontal_face_detector()
    shape_predictor = get_dlib_shape_predictor()

    return cls(
        frontal_face_detector=frontal_face_detector,
        shape_predictor=shape_predictor,
        color=color,
    )

random_rectangle

Module for adding random rectangles to images with various color strategies.

This module provides an abstract base class and concrete implementations for adding random rectangles to images. The rectangles can have random colors, a fixed black color, or the average color of the occluded region.

AddRandomBlackRectangle

Bases: BaseAddRandomRectangle

Add a random black rectangle to an image.

Source code in src/training/augmentation/random_rectangle.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
class AddRandomBlackRectangle(BaseAddRandomRectangle):
    """Add a random black rectangle to an image."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get the color black for the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The RGB color black (0, 0, 0).
        """
        return 0, 0, 0
get_color(img, rect)

Get the color black for the rectangle.

Parameters:

Name Type Description Default
img

The image being modified.

required
rect

The rectangle coordinates.

required

Returns:

Type Description
tuple[int, int, int]

The RGB color black (0, 0, 0).

Source code in src/training/augmentation/random_rectangle.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get the color black for the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The RGB color black (0, 0, 0).
    """
    return 0, 0, 0

AddRandomRectangleAverageColor

Bases: BaseAddRandomRectangle

Add a random rectangle with the average color of the occluded region.

Source code in src/training/augmentation/random_rectangle.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
class AddRandomRectangleAverageColor(BaseAddRandomRectangle):
    """Add a random rectangle with the average color of the occluded region."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get the average color of the rectangle region in the image.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The average RGB color of the rectangle region.
        """
        x1, y1, x2, y2 = rect
        cropped_area = img.crop((x1, y1, x2, y2))
        pixels = list(cropped_area.getdata())
        r = sum(p[0] for p in pixels) // len(pixels)
        g = sum(p[1] for p in pixels) // len(pixels)
        b = sum(p[2] for p in pixels) // len(pixels)
        return r, g, b
get_color(img, rect)

Get the average color of the rectangle region in the image.

Parameters:

Name Type Description Default
img

The image being modified.

required
rect

The rectangle coordinates.

required

Returns:

Type Description
tuple[int, int, int]

The average RGB color of the rectangle region.

Source code in src/training/augmentation/random_rectangle.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get the average color of the rectangle region in the image.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The average RGB color of the rectangle region.
    """
    x1, y1, x2, y2 = rect
    cropped_area = img.crop((x1, y1, x2, y2))
    pixels = list(cropped_area.getdata())
    r = sum(p[0] for p in pixels) // len(pixels)
    g = sum(p[1] for p in pixels) // len(pixels)
    b = sum(p[2] for p in pixels) // len(pixels)
    return r, g, b

AddRandomRectangleRandomColor

Bases: BaseAddRandomRectangle

Add a random rectangle with a random color to an image.

Source code in src/training/augmentation/random_rectangle.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class AddRandomRectangleRandomColor(BaseAddRandomRectangle):
    """Add a random rectangle with a random color to an image."""

    @override
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Get a random color for the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            A random RGB color.
        """
        r, g, b = tuple(random.randint(0, 255) for _ in range(3))
        return r, g, b
get_color(img, rect)

Get a random color for the rectangle.

Parameters:

Name Type Description Default
img

The image being modified.

required
rect

The rectangle coordinates.

required

Returns:

Type Description
tuple[int, int, int]

A random RGB color.

Source code in src/training/augmentation/random_rectangle.py
75
76
77
78
79
80
81
82
83
84
85
86
87
@override
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Get a random color for the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        A random RGB color.
    """
    r, g, b = tuple(random.randint(0, 255) for _ in range(3))
    return r, g, b

BaseAddRandomRectangle

Bases: ABC

Base class for adding a random rectangle to an image.

Attributes:

Name Type Description
min_size

Minimum size of the rectangle.

max_size

Maximum size of the rectangle.

Source code in src/training/augmentation/random_rectangle.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class BaseAddRandomRectangle(ABC):
    """Base class for adding a random rectangle to an image.

    Attributes:
        min_size: Minimum size of the rectangle.
        max_size: Maximum size of the rectangle.
    """

    def __init__(self, min_size=10, max_size=50):
        """Initialize the base class with rectangle size constraints.

        Args:
            min_size: Minimum size of the rectangle.
            max_size: Maximum size of the rectangle.
        """
        self.min_size = min_size
        self.max_size = max_size

    def __call__(self, img):
        """Add a random rectangle to the given image.

        Args:
            img: The image to modify.

        Returns:
            The modified image with a random rectangle.
        """
        draw = ImageDraw.Draw(img)
        width, height = img.size

        rect_width = random.randint(self.min_size, self.max_size)
        rect_height = random.randint(self.min_size, self.max_size)
        x1 = random.randint(0, width - rect_width)
        y1 = random.randint(0, height - rect_height)
        x2 = x1 + rect_width
        y2 = y1 + rect_height
        rect = [x1, y1, x2, y2]

        color = self.get_color(img, rect)

        draw.rectangle(rect, fill=color)
        return img

    @abstractmethod
    def get_color(self, img, rect) -> tuple[int, int, int]:
        """Abstract method to determine the color of the rectangle.

        Args:
            img: The image being modified.
            rect: The rectangle coordinates.

        Returns:
            The color of the rectangle in RGB format.
        """
        pass
__call__(img)

Add a random rectangle to the given image.

Parameters:

Name Type Description Default
img

The image to modify.

required

Returns:

Type Description

The modified image with a random rectangle.

Source code in src/training/augmentation/random_rectangle.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def __call__(self, img):
    """Add a random rectangle to the given image.

    Args:
        img: The image to modify.

    Returns:
        The modified image with a random rectangle.
    """
    draw = ImageDraw.Draw(img)
    width, height = img.size

    rect_width = random.randint(self.min_size, self.max_size)
    rect_height = random.randint(self.min_size, self.max_size)
    x1 = random.randint(0, width - rect_width)
    y1 = random.randint(0, height - rect_height)
    x2 = x1 + rect_width
    y2 = y1 + rect_height
    rect = [x1, y1, x2, y2]

    color = self.get_color(img, rect)

    draw.rectangle(rect, fill=color)
    return img
__init__(min_size=10, max_size=50)

Initialize the base class with rectangle size constraints.

Parameters:

Name Type Description Default
min_size

Minimum size of the rectangle.

10
max_size

Maximum size of the rectangle.

50
Source code in src/training/augmentation/random_rectangle.py
23
24
25
26
27
28
29
30
31
def __init__(self, min_size=10, max_size=50):
    """Initialize the base class with rectangle size constraints.

    Args:
        min_size: Minimum size of the rectangle.
        max_size: Maximum size of the rectangle.
    """
    self.min_size = min_size
    self.max_size = max_size
get_color(img, rect) abstractmethod

Abstract method to determine the color of the rectangle.

Parameters:

Name Type Description Default
img

The image being modified.

required
rect

The rectangle coordinates.

required

Returns:

Type Description
tuple[int, int, int]

The color of the rectangle in RGB format.

Source code in src/training/augmentation/random_rectangle.py
58
59
60
61
62
63
64
65
66
67
68
69
@abstractmethod
def get_color(self, img, rect) -> tuple[int, int, int]:
    """Abstract method to determine the color of the rectangle.

    Args:
        img: The image being modified.
        rect: The rectangle coordinates.

    Returns:
        The color of the rectangle in RGB format.
    """
    pass

fine_tuning

count_correct(embeddings_1, embeddings_2, labels)

Count correct embedding pairs with respect to labels.

For each triple (e1, e2, label): - label is 1 for the same person and -1 for different people - cosine similarity is used to determine if e1 and e2 are similar enough - similarity is in range [-1, 1], where 1 means very similar - if similarity >= 0, then e1 and e2 are considered similar (label 1) - if similarity < 0, then e1 and e2 are considered different (label -1)

B - batch dimension E - embedding dimension

Parameters:

Name Type Description Default
embeddings_1 Tensor

Embeddings of the first image in the pair, shape (B, E).

required
embeddings_2 Tensor

Embeddings of the second image in the pair, shape (B, E).

required
labels Tensor

Labels indicating if the pairs are from the same person (1) or different people (-1), shape (B,).

required
Source code in src/training/fine_tuning.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def count_correct(
    embeddings_1: Tensor,
    embeddings_2: Tensor,
    labels: Tensor,
) -> float:
    """Count correct embedding pairs with respect to labels.

    For each triple (e1, e2, label):
    - label is 1 for the same person and -1 for different people
    - cosine similarity is used to determine if e1 and e2 are similar enough
    - similarity is in range [-1, 1], where 1 means very similar
    - if similarity >= 0, then e1 and e2 are considered similar (label 1)
    - if similarity < 0, then e1 and e2 are considered different (label -1)

    B - batch dimension
    E - embedding dimension

    Args:
        embeddings_1 (Tensor): Embeddings of the first image in the pair, shape (B, E).
        embeddings_2 (Tensor): Embeddings of the second image in the pair, shape (B, E).
        labels (Tensor): Labels indicating if the pairs are from the same person (1) or different people (-1), shape (B,).
    """

    similarities = F.cosine_similarity(embeddings_1, embeddings_2)
    predicted_labels = (similarities >= 0).float()  # 1 if similar enough, 0 otherwise
    predicted_labels = (
        predicted_labels * 2 - 1
    )  # Convert from {0,1} to {-1, 1} (to match label format)
    correct_predictions = (predicted_labels == labels).sum().item()

    return correct_predictions

loss

ContrastiveLoss

Bases: Module

Source code in src/training/loss/contrastive_loss.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        """Label is 0 for the same person, 1 for different person"""
        euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
        loss = torch.mean(
            (1 - label) * torch.pow(euclidean_distance, 2)
            + label
            * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
        )
        return loss

forward(output1, output2, label)

Label is 0 for the same person, 1 for different person

Source code in src/training/loss/contrastive_loss.py
10
11
12
13
14
15
16
17
18
def forward(self, output1, output2, label):
    """Label is 0 for the same person, 1 for different person"""
    euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
    loss = torch.mean(
        (1 - label) * torch.pow(euclidean_distance, 2)
        + label
        * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
    )
    return loss

contrastive_loss

ContrastiveLoss

Bases: Module

Source code in src/training/loss/contrastive_loss.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        """Label is 0 for the same person, 1 for different person"""
        euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
        loss = torch.mean(
            (1 - label) * torch.pow(euclidean_distance, 2)
            + label
            * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
        )
        return loss
forward(output1, output2, label)

Label is 0 for the same person, 1 for different person

Source code in src/training/loss/contrastive_loss.py
10
11
12
13
14
15
16
17
18
def forward(self, output1, output2, label):
    """Label is 0 for the same person, 1 for different person"""
    euclidean_distance = torch.nn.functional.pairwise_distance(output1, output2)
    loss = torch.mean(
        (1 - label) * torch.pow(euclidean_distance, 2)
        + label
        * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2)
    )
    return loss