import torch
from torchvision.models import alexnet


face1 = torch.randn((3, 224, 224))
face2 = torch.randn((3, 224, 224))

model = alexnet(weights="AlexNet_Weights.DEFAULT")
# remove classification layer
model.fc = model.classifier[6] = torch.nn.Identity()

# get embeddings
embedding1 = model(face1.unsqueeze(0))
embedding2 = model(face2.unsqueeze(0))

diff = torch.nn.functional.pairwise_distance(embedding1, embedding2)
print("L2 distance: ", diff.item())

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:02<00:00, 108MB/s]

L2 distance:  28.585399627685547


from torch import nn

triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2)
anchor = torch.randn(100, 128, requires_grad=True)
positive = torch.randn(100, 128, requires_grad=True)
negative = torch.randn(100, 128, requires_grad=True)

loss = triplet_loss(anchor, positive, negative)
print(loss)

tensor(1.1361, grad_fn=<MeanBackward0>)


import torch.nn.functional as F

triplet_loss = nn.TripletMarginWithDistanceLoss(
    margin=1.0, distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y)
)
loss = triplet_loss(anchor, positive, negative)
print(loss)

tensor(1.0056, grad_fn=<MeanBackward0>)


!pip install -q lightning
!wget -qN https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/small_face_dataset.zip
!unzip -qn small_face_dataset.zip

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.0/2.0 MB 12.1 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 841.5/841.5 kB 17.0 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 801.9/801.9 kB 22.0 MB/s eta 0:00:00


import torch
import random
import numpy as np
import lightning as L


def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


set_random_seed(42)
L.seed_everything(42)

INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42

42


from glob import glob
from PIL import Image
from torch.utils.data import Dataset


class SiameseNetworkDataset(Dataset):
    def __init__(self, dir, transform=None):
        self.dir = dir
        self.transform = transform
        # list of paths to all images in self.dir
        self.files = glob(f"{self.dir}/**/*.pgm", recursive=True)
        self.person_index = self.build_index()

    def build_index(self):
        """
        Creates a dictionary "person_index" with such structure:
        {'s1': ['faces/training/s1/1.pgm', ..., 'faces/training/s1/10.pgm'],
         's2': ...}

        keys   — IDs of persons ('s1', 's2', ...)
        values — lists of paths to person's images
        """

        person_index = {}
        for path in self.files:
            id = self.path2id(path)
            if id not in person_index:
                person_index[id] = []
            person_index[id].append(path)
        return person_index

    def path2id(self, path):
        """
        Takes full path, for example 'faces/training/s1/1.pgm'
        and extracts ID from it, for example 's1'
        """

        return path.replace(self.dir, "").split("/")[0]

    def __getitem__(self, index):
        anchor_path = self.files[index]
        # given anchor path we need to find positive and negative examples
        positive_path = self.find_positive(anchor_path)
        negative_path = self.find_negative(anchor_path)

        # Loading the images
        anchor = Image.open(anchor_path)
        positive = Image.open(positive_path)
        negative = Image.open(negative_path)

        if self.transform is not None:  # Apply image transformations
            anchor = self.transform(anchor)
            positive = self.transform(positive)
            negative = self.transform(negative)

        return anchor, positive, negative

    def find_positive(self, anchor_path):
        """
        Given an anchor_path this function:
        1) extracts person's ID from anchor_path
        2) returns path to random image of THIS person, excepting anchor
        """

        id = self.path2id(anchor_path)
        all_exept_my = self.person_index[id].copy()
        all_exept_my.remove(anchor_path)
        positive_path = random.choice(all_exept_my)
        return positive_path

    def find_negative(self, anchor_path):
        """
        Given an anchor_path this function:
        1) extracts person's ID from anchor_path
        2) returns path to random image of randomly selected ANOTHER person
        """

        id = self.path2id(anchor_path)
        all_exept_my_ids = list(self.person_index.keys())
        all_exept_my_ids.remove(id)
        selected_id = random.choice(all_exept_my_ids)
        negative_path = random.choice(self.person_index[selected_id])
        return negative_path

    def __len__(self):
        return len(self.files)


import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader


# Apply augmentations on train data
transforms_train = transforms.Compose(
    [
        transforms.Resize((105, 105)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]
)

transforms_test = transforms.Compose(
    [transforms.Resize((105, 105)), transforms.ToTensor()]
)

train_dataset = SiameseNetworkDataset("faces/training/", transform=transforms_train)
test_dataset = SiameseNetworkDataset("faces/testing/", transform=transforms_test)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=1)


import matplotlib.pyplot as plt
from warnings import simplefilter

simplefilter("ignore", RuntimeWarning)

anchor, positive, negative = next(iter(train_loader))

# 8 - number of samples to visualise
anchor, positive, negative = anchor[:8], positive[:8], negative[:8]

# Show batch contents
concatenated = torch.cat((anchor, positive, negative), 0)
grid = torchvision.utils.make_grid(concatenated)

plt.axis("off")
plt.imshow(grid.permute(1, 2, 0).numpy())
plt.gcf().set_size_inches(20, 60)
plt.show()


!pip install -q timm

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.2/2.2 MB 15.0 MB/s eta 0:00:00


import timm


class SiameseNet(nn.Module):
    def __init__(self, embedding_dim=32):
        super().__init__()
        # Replace ImageNet 1000 class classifier to linear layer with embedding_dim outputs
        self.model = timm.create_model(
            "resnet18.a1_in1k", num_classes=embedding_dim, pretrained=False
        )

        # Because we use grayscale images reduce input channel count to one
        # and reduce kernel_size to process images smaller than ImageNet
        self.model.conv1 = nn.Conv2d(
            1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False
        )

    def forward(self, anchor, positive, negative):
        output_a = torch.nn.functional.normalize(self.model(anchor))
        output_p = torch.nn.functional.normalize(self.model(positive))
        output_n = torch.nn.functional.normalize(self.model(negative))

        return output_a, output_p, output_n


class LitSiamese(L.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model

        self.criterion = nn.TripletMarginWithDistanceLoss(
            margin=1.0, distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y)
        )

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

    def training_step(self, batch, batch_idx):
        anc, pos, neg = batch

        output_a, output_p, output_n = self.model(anc, pos, neg)
        loss = self.criterion(output_a, output_p, output_n)

        self.log("train_loss", loss)

        return loss

    def forward(self, anchor, positive, negative):
        return self.model(anchor, positive, negative)


!mkdir lightning_logs
!mkdir lightning_logs/SiameseNetwork


from lightning.pytorch.loggers import TensorBoardLogger

L.seed_everything(42)

embedding_dim = 32
model = LitSiamese(SiameseNet(embedding_dim))

logger = TensorBoardLogger(save_dir="lightning_logs", name="SiameseNetwork")
trainer = L.Trainer(max_epochs=20, logger=logger, log_every_n_steps=1)

trainer.fit(model, train_loader)

INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name      | Type                          | Params
------------------------------------------------------------
0 | model     | SiameseNet                    | 11.2 M
1 | criterion | TripletMarginWithDistanceLoss | 0     
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.736    Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name      | Type                          | Params
------------------------------------------------------------
0 | model     | SiameseNet                    | 11.2 M
1 | criterion | TripletMarginWithDistanceLoss | 0     
------------------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.736    Total estimated model params size (MB)

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=20` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


# Helper method for visualization
def show(img, text=None):
    img_np = img.cpu().numpy()
    plt.axis("off")
    plt.text(75, 120, text, fontweight="bold")
    plt.imshow(np.transpose(img_np, (1, 2, 0)))  # [CxHxW] -> [HxWxC] for imshow
    plt.show()


def plot_imgs(model, test_loader):
    similarity_pos = []
    similarity_neg = []
    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(test_loader):
            anc, pos, neg = batch
            output_a, output_p, output_n = model(anc, pos, neg)

            # compute Cosine Similarity
            sim_pos = F.cosine_similarity(output_a, output_p).item()
            sim_neg = F.cosine_similarity(output_a, output_n).item()

            similarity_pos.append(sim_pos)
            similarity_neg.append(sim_neg)

            if not i % 5:
                concatenated = torch.cat((anc, pos, neg))
                result = "OK" if sim_neg < sim_pos else "BAD"
                show(
                    torchvision.utils.make_grid(concatenated),
                    f"Positive / negative similarities: {sim_pos:.3f} / {sim_neg:.3f} - {result}",
                )

    return similarity_pos, similarity_neg


set_random_seed(42)
similarity_pos, similarity_neg = plot_imgs(model, test_loader)


import seaborn as sns

similarities = {"The same person": similarity_pos, "Another person": similarity_neg}

ax = sns.histplot(similarities, bins=20)
ax.set(xlabel="Pairwise similarity")
plt.show()


import torch.nn as nn


class Encoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()

        hidden_dims = [32, 64, 128, 256]  # num of filters in layers
        modules = []
        in_channels = 1  # initial value of channels
        for h_dim in hidden_dims:  # conv layers
            modules.append(
                nn.Sequential(
                    nn.Conv2d(
                        in_channels=in_channels,  # num of input channels
                        out_channels=h_dim,  # num of output channels
                        kernel_size=3,
                        stride=2,  # convolution kernel step
                        padding=1,  # save shape
                    ),
                    nn.BatchNorm2d(h_dim),
                    nn.LeakyReLU(),
                )
            )
            in_channels = h_dim  # changing number of input channels for next iteration

        modules.append(nn.Flatten())  # to vector, size 256 * 2 * 2 = 1024
        modules.append(nn.Linear(256 * 2 * 2, latent_dim))

        self.encoder = nn.Sequential(*modules)

    def forward(self, x):
        x = self.encoder(x)
        return x


class Decoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()

        hidden_dims = [256, 128, 64, 32]  # num of filters in layers
        self.linear = nn.Linear(in_features=latent_dim, out_features=1024)

        modules = []
        for i in range(len(hidden_dims) - 1):  # define upsample layers
            modules.append(
                nn.Sequential(
                    nn.Upsample(scale_factor=2),
                    nn.Conv2d(
                        in_channels=hidden_dims[i],
                        out_channels=hidden_dims[i + 1],
                        kernel_size=3,
                        padding=1,
                    ),
                    nn.BatchNorm2d(hidden_dims[i + 1]),
                    nn.LeakyReLU(),
                )
            )

        modules.append(
            nn.Sequential(
                nn.Upsample(scale_factor=2),
                nn.Conv2d(in_channels=hidden_dims[-1], out_channels=1, kernel_size=5),
            )
        )

        self.decoder = nn.Sequential(*modules)

    def forward(self, x, with_sigmoid=False):
        x = self.linear(x)  # from latents space to Linear
        x = x.view(-1, 256, 2, 2)  # reshape
        x = self.decoder(x)  # reconstruction
        if with_sigmoid:
            x = F.sigmoid(x)
        return x


from torchsummary import summary

latent_dim = 2

encoder = Encoder(latent_dim=latent_dim)
print(">>> Encoder")
print(summary(encoder, (1, 28, 28), device="cpu"))

decoder = Decoder(latent_dim=latent_dim)
print(">>> Decoder")
print(summary(decoder, (1, latent_dim), device="cpu"))

>>> Encoder
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 32, 14, 14]             320
       BatchNorm2d-2           [-1, 32, 14, 14]              64
         LeakyReLU-3           [-1, 32, 14, 14]               0
            Conv2d-4             [-1, 64, 7, 7]          18,496
       BatchNorm2d-5             [-1, 64, 7, 7]             128
         LeakyReLU-6             [-1, 64, 7, 7]               0
            Conv2d-7            [-1, 128, 4, 4]          73,856
       BatchNorm2d-8            [-1, 128, 4, 4]             256
         LeakyReLU-9            [-1, 128, 4, 4]               0
           Conv2d-10            [-1, 256, 2, 2]         295,168
      BatchNorm2d-11            [-1, 256, 2, 2]             512
        LeakyReLU-12            [-1, 256, 2, 2]               0
          Flatten-13                 [-1, 1024]               0
           Linear-14                    [-1, 2]           2,050
================================================================
Total params: 390,850
Trainable params: 390,850
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.29
Params size (MB): 1.49
Estimated Total Size (MB): 1.79
----------------------------------------------------------------
None
>>> Decoder
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Linear-1              [-1, 1, 1024]           3,072
          Upsample-2            [-1, 256, 4, 4]               0
            Conv2d-3            [-1, 128, 4, 4]         295,040
       BatchNorm2d-4            [-1, 128, 4, 4]             256
         LeakyReLU-5            [-1, 128, 4, 4]               0
          Upsample-6            [-1, 128, 8, 8]               0
            Conv2d-7             [-1, 64, 8, 8]          73,792
       BatchNorm2d-8             [-1, 64, 8, 8]             128
         LeakyReLU-9             [-1, 64, 8, 8]               0
         Upsample-10           [-1, 64, 16, 16]               0
           Conv2d-11           [-1, 32, 16, 16]          18,464
      BatchNorm2d-12           [-1, 32, 16, 16]              64
        LeakyReLU-13           [-1, 32, 16, 16]               0
         Upsample-14           [-1, 32, 32, 32]               0
           Conv2d-15            [-1, 1, 28, 28]             801
================================================================
Total params: 391,617
Trainable params: 391,617
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.81
Params size (MB): 1.49
Estimated Total Size (MB): 2.30
----------------------------------------------------------------
None


!pip install -q lightning


import lightning as L
import torch.nn.functional as F
from collections import defaultdict


class LitAE(L.LightningModule):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder

    def training_step(self, batch, batch_idx):
        data, labels = batch

        # here is the logic how data is moved through AE
        latent = self.encoder(data)
        recon = self.decoder(latent)

        # here is the loss function computing
        loss = F.binary_cross_entropy_with_logits(recon, data)

        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4)

    def on_test_epoch_start(self):
        # create dict with empty tensors for further accumulating over batches
        self.test_result = defaultdict(torch.Tensor)

    def test_step(self, batch, batch_idx):
        data, labels = batch

        # here is the logic how data is moved through AE
        latent = self.encoder(data)
        recon = self.decoder(latent, with_sigmoid=True)

        self.update_test_result(data, recon, latent, labels)

    def update_test_result(self, data, recon, latent, labels):
        # accumulating results every batch
        self.test_result["real"] = torch.cat([self.test_result["real"], data.cpu()])
        self.test_result["recon"] = torch.cat([self.test_result["recon"], recon.cpu()])
        self.test_result["latent"] = torch.cat(
            [self.test_result["latent"], latent.cpu()]
        )
        self.test_result["labels"] = torch.cat(
            [self.test_result["labels"], labels.cpu()]
        )

    def on_test_epoch_end(self):
        # simply change type from torch tensor to numpy array
        # for every item in test_result dictionary
        for key in self.test_result:
            self.test_result[key] = self.test_result[key].numpy()


import torchvision
from torchvision.datasets import MNIST
from IPython.display import clear_output

root = "./data"

train_set = MNIST(
    root=root, train=True, transform=torchvision.transforms.ToTensor(), download=True
)
test_set = MNIST(
    root=root, train=False, transform=torchvision.transforms.ToTensor(), download=True
)
clear_output()


import torch

torch.manual_seed(42)

batch_size = 64
train_loader = torch.utils.data.DataLoader(
    train_set, batch_size=batch_size, shuffle=True, num_workers=2
)

test_loader = torch.utils.data.DataLoader(
    test_set, batch_size=batch_size, shuffle=False, num_workers=2
)


!mkdir lightning_logs/AE_latent_dim=2


from lightning.pytorch.loggers import TensorBoardLogger

L.seed_everything(42)

latent_dim = 2
encoder = Encoder(latent_dim=latent_dim)
decoder = Decoder(latent_dim=latent_dim)

autoencoder = LitAE(encoder, decoder)

logger = TensorBoardLogger(save_dir="lightning_logs", name="AE_latent_dim=2")
trainer = L.Trainer(max_epochs=5, logger=logger)

trainer.fit(autoencoder, train_loader)

INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 390 K 
1 | decoder | Decoder | 391 K 
------------------------------------
782 K     Trainable params
0         Non-trainable params
782 K     Total params
3.130     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 390 K 
1 | decoder | Decoder | 391 K 
------------------------------------
782 K     Trainable params
0         Non-trainable params
782 K     Total params
3.130     Total estimated model params size (MB)

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=5` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


trainer.test(autoencoder, test_loader)
run_res = autoencoder.test_result

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Testing: |          | 0/? [00:00<?, ?it/s]


import numpy as np
import matplotlib.pyplot as plt


def plot_samples(*args, digit_size=28, name=None, single_size=2):
    args = [x.squeeze() for x in args]
    n = min([x.shape[0] for x in args])
    figure = np.zeros((digit_size * len(args), digit_size * n))

    for i in range(n):
        for j in range(len(args)):
            figure[
                j * digit_size : (j + 1) * digit_size,
                i * digit_size : (i + 1) * digit_size,
            ] = args[j][i].squeeze()

    plt.figure(figsize=(single_size * n, single_size * len(args)))

    plt.imshow(figure, cmap="gray_r", clim=(0, 1))

    plt.grid(False)
    ax = plt.gca()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    if name is not None:
        plt.savefig(name)
    plt.show()


plot_samples(run_res["real"][0:9], run_res["recon"][0:9])


def plot_manifold(latent_r, labels=None, alpha=0.9, title=None):
    plt.figure(figsize=(8, 8))
    if labels is None:
        plt.scatter(latent_r[:, 0], latent_r[:, 1], alpha=alpha)
    else:
        plt.scatter(latent_r[:, 0], latent_r[:, 1], c=labels, cmap="tab10", alpha=alpha)
        plt.colorbar()

    if title:
        plt.title(title)
    plt.show()


plot_manifold(run_res["latent"], run_res["labels"], title="AE manifold (latent_dim=2)")


!mkdir lightning_logs/AE_latent_dim=24


L.seed_everything(42)

latent_dim = 24
encoder = Encoder(latent_dim=latent_dim)
decoder = Decoder(latent_dim=latent_dim)

autoencoder = LitAE(encoder, decoder)

logger = TensorBoardLogger(save_dir="lightning_logs", name="AE_latent_dim=24")
trainer = L.Trainer(max_epochs=5, logger=logger)

trainer.fit(autoencoder, train_loader)

INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 413 K 
1 | decoder | Decoder | 414 K 
------------------------------------
827 K     Trainable params
0         Non-trainable params
827 K     Total params
3.310     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type    | Params
------------------------------------
0 | encoder | Encoder | 413 K 
1 | decoder | Decoder | 414 K 
------------------------------------
827 K     Trainable params
0         Non-trainable params
827 K     Total params
3.310     Total estimated model params size (MB)

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=5` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


trainer.test(autoencoder, test_loader)
run_res = autoencoder.test_result
plot_samples(run_res["real"][0:9], run_res["recon"][0:9])

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Testing: |          | 0/? [00:00<?, ?it/s]


class AddGaussianNoise:
    def __init__(self, mean=0.0, std=1.0):
        self.std = std
        self.mean = mean

    def __call__(self, tensor):
        return tensor + torch.randn(tensor.size()) * self.std + self.mean

    def __repr__(self):
        return self.__class__.__name__ + f"(mean={self.mean}, std={self.std})"


torch.manual_seed(42)

test_noise_set = MNIST(
    root=root,
    train=False,
    transform=torchvision.transforms.Compose(
        [torchvision.transforms.ToTensor(), AddGaussianNoise(0.0, 0.10)]
    ),
    download=True,
)

test_noised_loader = torch.utils.data.DataLoader(
    torch.utils.data.Subset(test_noise_set, list(range(64))),
    batch_size=batch_size,
    shuffle=False,
    num_workers=2,
)


trainer.test(autoencoder, test_noised_loader)
run_res = autoencoder.test_result

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Testing: |          | 0/? [00:00<?, ?it/s]


plot_samples(run_res["real"][0:9], run_res["recon"][0:9])


seven = (test_set.data[test_set.targets == 7][2:3] / 255).unsqueeze(0)
six = (test_set.data[test_set.targets == 6][1:2] / 255).unsqueeze(0)

latent_space1 = autoencoder.encoder(seven)
latent_space2 = autoencoder.encoder(six)


interp_steps = 10
weight = torch.linspace(0, 1, steps=interp_steps)
interp = torch.lerp(
    latent_space1.repeat(interp_steps, 1),
    latent_space2.repeat(interp_steps, 1),
    weight=weight.view(-1, 1),
)
iterp_imgs = autoencoder.decoder(interp, with_sigmoid=True)


_, axs = plt.subplots(nrows=1, ncols=interp_steps, figsize=(16, 4))
for step in range(0, interp_steps):
    figure = iterp_imgs[step].cpu().detach().numpy()
    figure = figure.reshape(28, 28)
    ax = axs[step]
    ax.imshow(figure, cmap="gray_r", clim=(0, 1))
    ax.grid(False)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False);


from PIL import Image

interp_steps = 200
weight = torch.linspace(0, 1, steps=interp_steps)
interp = torch.lerp(
    latent_space1.repeat(interp_steps, 1),
    latent_space2.repeat(interp_steps, 1),
    weight=weight.view(-1, 1),
)
iterp_imgs = autoencoder.decoder(interp, with_sigmoid=True)

resize_coeff = 10
imgs = np.squeeze(iterp_imgs.cpu().detach().numpy())
size = (imgs.shape[1] * resize_coeff, imgs.shape[2] * resize_coeff)


imgs = [
    Image.fromarray(np.uint8(255 - img * 255)).resize(size).convert("RGB")
    for img in imgs
]
imgs[0].save(
    "ae_img.gif",
    save_all=True,
    append_images=imgs[1:],
    optimize=False,
    duration=40,
    loop=0,
)


from IPython.display import Image as iImage

iImage(open("ae_img.gif", "rb").read())


class VAEEncoder(Encoder):
    def __init__(self, latent_dim):
        if latent_dim % 2 != 0:  # check for the parity of the latent space
            raise Exception("Latent size for VAEEncoder must be even")

        super().__init__(latent_dim)


import lightning as L
import torch.nn.functional as F


class LitVAE(LitAE):
    def __init__(self, encoder, decoder, kld_weight=0.005, recon_weight=1.0):
        super().__init__(encoder, decoder)
        self.kld_weight = kld_weight
        self.recon_weight = recon_weight

    def vae_split(self, latent):
        size = (
            latent.shape[1] // 2
        )  # divide the latent representation into mu and log_var
        mu = latent[:, :size]
        log_var = latent[:, size:]
        return mu, log_var

    def vae_reparametrize(self, mu, log_var):
        sigma = torch.exp(0.5 * log_var)
        eps = torch.randn(mu.shape[0], mu.shape[1]).to(self.device)
        return eps * sigma + mu

    def kld_loss(self, mu, log_var):
        var = log_var.exp()
        kl_loss = torch.mean(-0.5 * torch.sum(log_var - var - mu**2 + 1, dim=1), dim=0)
        return kl_loss

    def training_step(self, batch, batch_idx):
        data, labels = batch

        # here is the logic how data is moved through VAE
        latent = self.encoder(data)

        mu, log_var = self.vae_split(latent)
        sample = self.vae_reparametrize(mu, log_var)

        recon = self.decoder(sample)

        # here is the loss function computing
        loss = self.recon_weight * F.binary_cross_entropy_with_logits(
            recon, data
        ) + self.kld_weight * self.kld_loss(mu, log_var)

        self.log("train_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        data, labels = batch

        # here is the logic how data is moved through VAE
        latent = self.encoder(data)

        mu, log_var = self.vae_split(latent)
        sample = self.vae_reparametrize(mu, log_var)

        recon = self.decoder(sample, with_sigmoid=True)

        self.update_test_result(data, recon, latent, labels)


!mkdir lightning_logs/VAE_latent_dim=2_KL_only


L.seed_everything(42)

latent_dim = 2
encoder = VAEEncoder(latent_dim=latent_dim * 2)
decoder = Decoder(latent_dim=latent_dim)

autoencoder = LitVAE(encoder, decoder, kld_weight=0.1, recon_weight=0.0)  # bad practice

logger = TensorBoardLogger(save_dir="lightning_logs", name="VAE_latent_dim=2_KL_only")
trainer = L.Trainer(max_epochs=5, logger=logger)

trainer.fit(autoencoder, train_loader)

INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name    | Type       | Params
---------------------------------------
0 | encoder | VAEEncoder | 392 K 
1 | decoder | Decoder    | 391 K 
---------------------------------------
784 K     Trainable params
0         Non-trainable params
784 K     Total params
3.138     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | encoder | VAEEncoder | 392 K 
1 | decoder | Decoder    | 391 K 
---------------------------------------
784 K     Trainable params
0         Non-trainable params
784 K     Total params
3.138     Total estimated model params size (MB)

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=5` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


trainer.test(autoencoder, test_loader)
run_res = autoencoder.test_result

mu, log_var = autoencoder.vae_split(run_res["latent"])
var = np.exp(log_var)

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Testing: |          | 0/? [00:00<?, ?it/s]


plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.hist(mu.ravel())
plt.xlabel("$\mu$", fontsize=20)

plt.subplot(1, 2, 2)
plt.hist(var.ravel())
plt.xlabel("$\sigma^2$", fontsize=20)

plt.show()


import seaborn as sns

sns.set_style("whitegrid")

mu, log_var = torch.tensor(mu), torch.tensor(log_var)
sample = autoencoder.vae_reparametrize(mu, log_var).numpy()

pal = sns.color_palette("Paired", n_colors=10)
plot_manifold(sample, run_res["labels"], title="Manifold mu")


!mkdir lightning_logs/VAE_latent_dim=2


L.seed_everything(42)

latent_dim = 2
encoder = VAEEncoder(latent_dim=latent_dim * 2)
decoder = Decoder(latent_dim=latent_dim)

autoencoder = LitVAE(encoder, decoder, kld_weight=0.005, recon_weight=1.0)

logger = TensorBoardLogger(save_dir="lightning_logs", name="VAE_latent_dim=2")
trainer = L.Trainer(max_epochs=5, logger=logger)

trainer.fit(autoencoder, train_loader)

INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name    | Type       | Params
---------------------------------------
0 | encoder | VAEEncoder | 392 K 
1 | decoder | Decoder    | 391 K 
---------------------------------------
784 K     Trainable params
0         Non-trainable params
784 K     Total params
3.138     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | encoder | VAEEncoder | 392 K 
1 | decoder | Decoder    | 391 K 
---------------------------------------
784 K     Trainable params
0         Non-trainable params
784 K     Total params
3.138     Total estimated model params size (MB)

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=5` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


trainer.test(autoencoder, test_loader)
run_res = autoencoder.test_result

mu, log_var = autoencoder.vae_split(run_res["latent"])

mu, log_var = torch.tensor(mu), torch.tensor(log_var)
sample = autoencoder.vae_reparametrize(mu, log_var).numpy()

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Testing: |          | 0/? [00:00<?, ?it/s]


pal = sns.color_palette("Paired", n_colors=10)
plot_manifold(sample, run_res["labels"])


!mkdir lightning_logs/VAE_latent_dim=24


L.seed_everything(42)

latent_dim = 24
encoder = VAEEncoder(latent_dim=latent_dim * 2)
decoder = Decoder(latent_dim=latent_dim)

autoencoder = LitVAE(encoder, decoder)

logger = TensorBoardLogger(save_dir="lightning_logs", name="VAE_latent_dim=24")
trainer = L.Trainer(max_epochs=5, logger=logger)

trainer.fit(autoencoder, train_loader)

INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name    | Type       | Params
---------------------------------------
0 | encoder | VAEEncoder | 438 K 
1 | decoder | Decoder    | 414 K 
---------------------------------------
852 K     Trainable params
0         Non-trainable params
852 K     Total params
3.409     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | encoder | VAEEncoder | 438 K 
1 | decoder | Decoder    | 414 K 
---------------------------------------
852 K     Trainable params
0         Non-trainable params
852 K     Total params
3.409     Total estimated model params size (MB)

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=5` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


seven = (test_set.data[test_set.targets == 7][2:3] / 255).unsqueeze(0)
six = (test_set.data[test_set.targets == 6][1:2] / 255).unsqueeze(0)

latent_space1_mu, _ = autoencoder.vae_split(autoencoder.encoder(seven))
latent_space2_mu, _ = autoencoder.vae_split(autoencoder.encoder(six))


interp_steps = 10
weight = torch.linspace(0, 1, steps=interp_steps)
interp = torch.lerp(
    latent_space1_mu.repeat(interp_steps, 1),
    latent_space2_mu.repeat(interp_steps, 1),
    weight=weight.view(-1, 1),
)
iterp_imgs = autoencoder.decoder(interp, with_sigmoid=True)
_, axs = plt.subplots(nrows=1, ncols=interp_steps, figsize=(16, 4))
for step in range(0, interp_steps):
    figure = iterp_imgs[step].cpu().detach().numpy()
    figure = figure.reshape(28, 28)
    ax = axs[step]
    ax.imshow(figure, cmap="gray_r", clim=(0, 1))
    ax.grid(False)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False);


from PIL import Image

interp_steps = 200
weight = torch.linspace(0, 1, steps=interp_steps)
interp = torch.lerp(
    latent_space1_mu.repeat(interp_steps, 1),
    latent_space2_mu.repeat(interp_steps, 1),
    weight=weight.view(-1, 1),
)
iterp_imgs = autoencoder.decoder(interp, with_sigmoid=True)


resize_coeff = 10
imgs = np.squeeze(iterp_imgs.cpu().detach().numpy())
size = (imgs.shape[1] * resize_coeff, imgs.shape[2] * resize_coeff)


imgs = [
    Image.fromarray(np.uint8(255 - img * 255)).resize(size).convert("RGB")
    for img in imgs
]
imgs[0].save(
    "vae_img.gif",
    save_all=True,
    append_images=imgs[1:],
    optimize=False,
    duration=40,
    loop=0,
)


from IPython.display import Image as iImage

iImage(open("vae_img.gif", "rb").read())


trainer.test(autoencoder, test_noised_loader)
run_res = autoencoder.test_result

plot_samples(run_res["real"][0:9], run_res["recon"][0:9])

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Testing: |          | 0/? [00:00<?, ?it/s]


trainer.test(autoencoder, test_loader)
run_res = autoencoder.test_result

plot_samples(run_res["real"][0:9], run_res["recon"][0:9])

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Testing: |          | 0/? [00:00<?, ?it/s]


imgs, labels = next(iter(test_loader))
real_9_straight = imgs[labels == 9][6:7]  # find some straight "nine"
real_1_straight = imgs[labels == 1][3:4]  # find some straight "one"
real_1_tilted = imgs[labels == 1][0:1]  # find some tilted "one"


size = (256, 256)
Image.fromarray(np.uint8(255 - np.squeeze(real_9_straight.numpy()) * 255)).resize(size)


Image.fromarray(np.uint8(255 - np.squeeze(real_1_straight.numpy()) * 255)).resize(size)


Image.fromarray(np.uint8(255 - np.squeeze(real_1_tilted.numpy()) * 255)).resize(size)


latent_9_straight, _ = autoencoder.vae_split(autoencoder.encoder(real_9_straight))
latent_1_straight, _ = autoencoder.vae_split(autoencoder.encoder(real_1_straight))
latent_1_tilted, _ = autoencoder.vae_split(autoencoder.encoder(real_1_tilted))


latent_9_tilted = latent_9_straight - latent_1_straight + latent_1_tilted
gen_9_tilted = autoencoder.decoder(latent_9_tilted, with_sigmoid=True)


Image.fromarray(
    np.uint8(255 - np.squeeze(gen_9_tilted.cpu().detach().numpy()) * 255)
).resize(size)


class CDecoder(Decoder):
    def __init__(self, latent_dim, class_dim):
        super().__init__(latent_dim)

        self.label_embedding = nn.Embedding(num_embeddings=10, embedding_dim=class_dim)

        linear_out_features = self.linear.out_features

        # linear will take latent vector concatenated with label embedding
        self.linear = nn.Linear(
            in_features=latent_dim + class_dim,
            out_features=linear_out_features,
        )

    def forward(self, x, label, with_sigmoid=False):
        label = self.label_embedding(label)
        x = torch.cat([x, label], dim=1)  # concatenate latent vector and label embedding
        x = self.linear(x)
        x = x.view(-1, 256, 2, 2)
        x = self.decoder(x)
        if with_sigmoid:
            x = F.sigmoid(x)
        return x


import lightning as L
import torch.nn.functional as F


class LitCVAE(LitVAE):
    def training_step(self, batch, batch_idx):
        data, labels = batch

        # here is the logic how data is moved through CVAE
        latent = self.encoder(data)

        mu, log_var = self.vae_split(latent)
        sample = self.vae_reparametrize(mu, log_var)

        recon = self.decoder(
            sample, labels
        )  # decoder takes latent vector and class label

        # here is the loss function computing
        loss = self.recon_weight * F.binary_cross_entropy_with_logits(
            recon, data
        ) + self.kld_weight * self.kld_loss(mu, log_var)

        self.log("train_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        data, labels = batch

        # here is the logic how data is moved through CVAE
        latent = self.encoder(data)

        mu, log_var = self.vae_split(latent)
        sample = self.vae_reparametrize(mu, log_var)

        # decoder takes latent vector and class label
        recon = self.decoder(sample, labels, with_sigmoid=True)

        self.update_test_result(data, recon, latent, labels)


!mkdir lightning_logs/CVAE_latent_dim=2


L.seed_everything(42)

latent_dim = 2
class_dim = 4
encoder = VAEEncoder(latent_dim=latent_dim * 2)
decoder = CDecoder(latent_dim=latent_dim, class_dim=class_dim)

autoencoder = LitCVAE(encoder, decoder)

logger = TensorBoardLogger(save_dir="lightning_logs", name="CVAE_latent_dim=2")
trainer = L.Trainer(max_epochs=5, logger=logger)

trainer.fit(autoencoder, train_loader)

INFO: Seed set to 42
INFO:lightning.fabric.utilities.seed:Seed set to 42
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name    | Type       | Params
---------------------------------------
0 | encoder | VAEEncoder | 392 K 
1 | decoder | CDecoder   | 395 K 
---------------------------------------
788 K     Trainable params
0         Non-trainable params
788 K     Total params
3.155     Total estimated model params size (MB)
INFO:lightning.pytorch.callbacks.model_summary:
  | Name    | Type       | Params
---------------------------------------
0 | encoder | VAEEncoder | 392 K 
1 | decoder | CDecoder   | 395 K 
---------------------------------------
788 K     Trainable params
0         Non-trainable params
788 K     Total params
3.155     Total estimated model params size (MB)

Training: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=5` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=5` reached.


trainer.test(autoencoder, test_loader)
run_res = autoencoder.test_result

plot_samples(run_res["real"][0:9], run_res["recon"][0:9])

INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

Testing: |          | 0/? [00:00<?, ?it/s]


plot_manifold(run_res["latent"], run_res["labels"])


plot_manifold(run_res["latent"][run_res["labels"] == 4])


plot_manifold(run_res["latent"][run_res["labels"] == 9])


steps = 20
space1 = torch.linspace(-2, 2, steps)
space2 = torch.linspace(-2, 2, steps)
grid = torch.cartesian_prod(space1, space2)
label = torch.full((grid.shape[0],), 4)
with torch.no_grad():
    imgs = autoencoder.decoder(grid, label, with_sigmoid=True)
    imgs = imgs.cpu().numpy().squeeze()

plot_samples(
    *[imgs[x : x + steps] for x in range(0, steps * steps, steps)], single_size=0.35
)


steps = 20
space1 = torch.linspace(-2, 2, steps)
space2 = torch.linspace(-2, 2, steps)
grid = torch.cartesian_prod(space1, space2)
label = torch.full((grid.shape[0],), 9)
with torch.no_grad():
    imgs = autoencoder.decoder(grid, label, with_sigmoid=True)
    imgs = imgs.cpu().numpy().squeeze()

plot_samples(
    *[imgs[x : x + steps] for x in range(0, steps * steps, steps)], single_size=0.35
)


imgs, labels = next(iter(test_loader))
real = imgs[labels == 2][4:5]


size = (256, 256)
Image.fromarray(np.uint8(255 - np.squeeze(real.numpy()) * 255)).resize(size)


torch.manual_seed(42)

sample_size = 10

mu, log_var = autoencoder.vae_split(autoencoder.encoder(real))
sigma = torch.exp(0.5 * log_var)
z = torch.randn(sample_size, mu.shape[1])
latent = z * sigma + mu

label = torch.arange(10)


with torch.no_grad():
    imgs = autoencoder.decoder(latent, label, with_sigmoid=True)
    imgs = np.squeeze(imgs.cpu().numpy())


plot_samples(imgs)

Representation learning¶

Глубокие нейронные сети как модели обучения представлений¶

Понижение размерности и гипотеза о многообразии¶

Metric learning¶

Формирование векторов признаков¶

Сиамская сеть¶

Triplet Loss¶

Реализация сиамской сети¶

Загрузка данных¶

Датасет для работы с Triplet Loss¶

Создание модели¶

Обучение¶

Проверка¶

Автоэнкодеры (AE)¶

Архитектура автоэнкодера¶

Очищение изображения от шумов¶

Добавление шума к исходной выборке¶

Реализация автоэнкодера¶

Архитектура¶

Код обучения¶

Инференс модели¶

Обнаружение аномалий¶

Предобучение на неразмеченных данных¶

Автоэнкодер как генератор и его ограничения¶

Плавная интерполяция¶

Ограничения декодера как генератора¶

Вариационные автоэнкодеры (VAE)¶

Сэмплирование в латентном пространстве¶

Регуляризация латентного пространства¶

Реализация VAE¶

Демонстрация эффекта регуляризации¶

Совмещение ошибки восстановления и KL-дивергенции¶

Плавная интерполяция¶

Векторная арифметика¶

Ограничения VAE¶

Условные вариационные автоэнкодеры (CVAE)¶

Реализация CVAE¶

Генерация заданных цифр из латентного распределения¶

Генерация заданных цифр с переносом стиля¶

Разделение стиля и метки¶