from torch import nn


class FCNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers_stack = nn.Sequential(
            nn.Linear(3 * 32 * 32, 64),
            nn.ReLU(),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        return self.layers_stack(x)


!wget -q "https://edunet.kea.su/repo/EduNet-web_dependencies/weights/2layer.pt"


import torch

fc_model = FCNet()
weights_in_dict = torch.load("2layer.pt")
fc_model.load_state_dict(weights_in_dict)

<All keys matched successfully>


import matplotlib.pyplot as plt
from torchvision import utils


W1 = fc_model.layers_stack[0].weight.reshape(64, 3, 32, 32)  # layer has 64 neurons
img_grid = utils.make_grid(W1, pad_value=1, normalize=True, nrow=16)

plt.figure(figsize=(20, 12))
plt.title("Weights visualization in 2D")
plt.imshow(img_grid.permute(1, 2, 0).cpu().numpy())  # CHW -> HWC
plt.axis("off")
plt.show()


!wget -q "https://edunet.kea.su/repo/EduNet-web_dependencies/dev-2.0/L06/digit.png"


import numpy as np
from PIL import Image


image = Image.open("digit.png")
img_np = np.array(image)
plt.imshow(img_np, cmap="gray")
plt.show()


print(img_np)

[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0  12  15  20  12   1   0   0   0]
 [  0   0   0   0   0   0   3 123 225 242 255 225 137   7   0   0]
 [  0   0   0   0   0  15 207 251 103 109 107 105 171 145   0   0]
 [  0   0   0   0   0 149 236  80   0   0   0   0   4  21   0   0]
 [  0   0   0   0  16 228 106   0  17  33  17   0   0   0   0   0]
 [  0   0   0   0  97 248  98 138 232 255 216  22   0   0   0   0]
 [  0   0   0   0  50 247 255 197 111 123 252 129   0   0   0   0]
 [  0   0   0   0   1  54  55   9   0  49 250 116   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0 158 240  44   0   0   0   0]
 [  0   0   0   0   0   0   0   0  88 255 125   0   0   0   0   0]
 [  0   0   0   0   7  15  23 132 255 188  12   0   0   0   0   0]
 [  0   0   0   0  20 200 229 240 157  28   0   0   0   0   0   0]
 [  0   0   0   0   3  64  73  45   0   0   0   0   0   0   0   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0]]


fig, ax = plt.subplots(ncols=2, figsize=(10, 4))
ax[0].imshow(img_np, cmap="gray")
ax[1].imshow(img_np.reshape(1, -1), aspect=20, cmap="gray")
ax[0].set_title("Original image")
ax[1].set_title("Flattened image")

vector = np.array(image).flatten()
print(list(vector))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 15, 20, 12, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 123, 225, 242, 255, 225, 137, 7, 0, 0, 0, 0, 0, 0, 0, 15, 207, 251, 103, 109, 107, 105, 171, 145, 0, 0, 0, 0, 0, 0, 0, 149, 236, 80, 0, 0, 0, 0, 4, 21, 0, 0, 0, 0, 0, 0, 16, 228, 106, 0, 17, 33, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 248, 98, 138, 232, 255, 216, 22, 0, 0, 0, 0, 0, 0, 0, 0, 50, 247, 255, 197, 111, 123, 252, 129, 0, 0, 0, 0, 0, 0, 0, 0, 1, 54, 55, 9, 0, 49, 250, 116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 158, 240, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88, 255, 125, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 15, 23, 132, 255, 188, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 200, 229, 240, 157, 28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 64, 73, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


from skimage import data, color
from skimage.transform import rescale

# fmt: off

box_blur_kernel = 1/9 * np.array([[1, 1, 1],
                         [1, 1, 1],
                         [1, 1, 1]])
# fmt: on


def apply_filter(img, kernel):
    h, w = np.array(img.shape)  # image height and width
    kh, kw = np.array(kernel.shape)  # kernel height and width (3x3)
    # calculate the output size, hard work ...
    out = np.zeros((h - kh + 1, w - kw + 1))
    for i in range(h - kh + 1):
        for j in range(w - kw + 1):
            # get 3x3 patch from image
            patch = img[i : i + kh, j : j + kw]
            # elementwise multiply patch pixels to kernel weights and sum
            new_pixel = np.multiply(patch, kernel).sum()
            # store modified pixel in new blurred image
            out[i, j] = new_pixel
    return out


def show(img, result):
    # Display results
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    axes[0].imshow(img, cmap="gray")
    axes[1].imshow(out, cmap="gray")
    axes[0].set(title=f"Original image, shape: {img_cat_resc.shape}")
    axes[1].set(title=f"Blurred image: {out.shape}")
    axes[0].axis("off")
    axes[1].axis("off")
    plt.show()


img_cat = color.rgb2gray(data.cat())
img_cat_resc = rescale(img_cat, 0.25, anti_aliasing=False) * 255

out = apply_filter(img_cat_resc, box_blur_kernel)
show(img_cat_resc, out)


# fmt: off
# Gaussian 3x3 kernel, sum of weights == 1
gauss_kernel = np.array([[1/16, 1/8, 1/16],
                   [1/8,  1/4, 1/8 ],
                   [1/16, 1/8, 1/16]])
# fmt: on

out = apply_filter(img_cat_resc, gauss_kernel)
show(img_cat_resc, out)


# fmt: off
sobel_y_kernel = torch.tensor([[ 1.0,  2.0, 1.0 ],
                               [ 0.0,  0.0, 0.0 ],
                               [-1.0, -2.0, -1.0]])
# fmt: on

x_edges = apply_filter(img_cat * 255, sobel_y_kernel)

fig, axes = plt.subplots(1, 2, figsize=(15, 5))
axes[0].imshow(img_cat, cmap="gray")
axes[1].imshow(x_edges, cmap="gray", vmin=0, vmax=255)
axes[0].set(title=f"Original image, shape: {img_cat.shape}")
axes[1].set(title=f"Horizontal edges detector: {x_edges.shape}")
axes[0].axis("off")
axes[1].axis("off")
plt.show()


# fmt: off
cross = np.array([[0, 0 ,0, 0, 0],
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 1, 0],
                  [0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 0]])
# fmt: on

plt.subplot(1, 2, 1)
plt.imshow(cross, cmap="gray")
plt.axis("off")
plt.show()


# fmt: off

kernel_cs = np.array([[-1, 1, -1],
                      [ 1, 1,  1],
                      [-1, 1, -1]])
# fmt: on


features = apply_filter(cross, kernel_cs)

plt.subplot(1, 2, 1)
plt.imshow(cross, cmap="gray")
plt.title("Image")

plt.subplot(1, 2, 2)
plt.xlim([0, 5])
plt.ylim([0, 5])
plt.imshow(features, extent=(1, 4, 1, 4))
plt.title("Features")
plt.colorbar(fraction=0.046, pad=0.04)
plt.show()
print("Features:\n", features)

Features:
 [[ 0. -1.  1.]
 [-1.  1.  0.]
 [ 1.  0.  5.]]


local_linear = nn.Linear(9, 1, bias=False)  # 9 = 3 * 3 (weights shape: (3,3))

local_linear.weight.data[0] = torch.tensor(kernel_cs).flatten()  # Bad practice
cross_in_tensor = torch.tensor(cross).float()
result = torch.zeros((3, 3))
for i in range(0, result.shape[0]):
    for j in range(0, result.shape[1]):
        segment = cross_in_tensor[i : i + 3, j : j + 3].flatten()
        result[i, j] = local_linear(segment)

plt.subplot(1, 2, 1)
plt.imshow(cross, cmap="gray")
plt.title("Image")

plt.subplot(1, 2, 2)
plt.xlim([0, 5])
plt.ylim([0, 5])
plt.imshow(result.detach(), extent=(1, 4, 1, 4))

plt.title("Result")
plt.colorbar(fraction=0.046, pad=0.04)
plt.show()
print("Result:\n", result.detach())

Result:
 tensor([[ 0., -1.,  1.],
        [-1.,  1.,  0.],
        [ 1.,  0.,  5.]])


import torch
import numpy as np

# fmt: off
cross = np.array([[0, 0 ,0, 0, 0],
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 1, 0],
                  [0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 0]])

kernel_cs = np.array([[-1, 1, -1],
                      [ 1, 1,  1],
                      [-1, 1, -1]])
# fmt: on

cross_in_tensor = torch.tensor(cross).float()


from torch.nn import Conv2d

conv = Conv2d(
    in_channels=1,  # what's this ?
    out_channels=1,  # what's this ?
    kernel_size=(3, 3),  # kernel.shape == 3x3
    bias=False,
)
# conv2d accepts input of shape BxCxHxW
feature_map = conv(
    cross_in_tensor.unsqueeze(0).unsqueeze(0)
)  # add batch and channel dim
print(feature_map)

tensor([[[[ 0.0000, -0.2520, -0.3146],
          [-0.2520, -0.4728, -0.2722],
          [ 0.0938,  0.1015,  0.1447]]]], grad_fn=<ConvolutionBackward0>)


# data[0] because filter can have multiple kernels. see next chapter
conv.weight.data[0] = torch.tensor(kernel_cs)  # replace original kernel

feature_map = conv(
    cross_in_tensor.unsqueeze(0).unsqueeze(0)
)  # add batch and channel dim
print("Feature map for cross\n", feature_map)

Feature map for cross
 tensor([[[[ 0., -1.,  1.],
          [-1.,  1.,  0.],
          [ 1.,  0.,  5.]]]], grad_fn=<ConvolutionBackward0>)


conv_ch1 = Conv2d(in_channels=1, out_channels=1, kernel_size=5)
print("One channel kernel \t", conv_ch1.weight.shape)
conv_ch3 = Conv2d(in_channels=3, out_channels=1, kernel_size=5)
print("Three channel kernel \t", conv_ch3.weight.shape)

One channel kernel 	 torch.Size([1, 1, 5, 5])
Three channel kernel 	 torch.Size([1, 3, 5, 5])


!wget -q https://edunet.kea.su/repo/EduNet-web_dependencies/dev-2.0/L06/cat.jpg


from PIL import Image

cat_in_pil = Image.open("cat.jpg")
display(cat_in_pil)


cat_in_np = np.array(cat_in_pil)  # pillow -> numpy
cat_in_float = cat_in_np.astype(np.float32) / 255  # int->float
cat_in_tensor = torch.tensor(cat_in_float)  # np -> torch

try:
    conv_ch3(cat_in_tensor.unsqueeze(0))  # add batch dimension
except Exception as e:
    print("Error: \n", e)

Error: 
 Given groups=1, weight of size [1, 3, 5, 5], expected input[1, 192, 192, 3] to have 3 channels, but got 192 channels instead


print("Original \t", cat_in_tensor.shape, "HWC")
cat_in_tensor_channel_first = cat_in_tensor.permute(2, 0, 1)  # HWC -> CHW
print("Torch style \t", cat_in_tensor_channel_first.shape, "CHW")

Original 	 torch.Size([192, 192, 3]) HWC
Torch style 	 torch.Size([3, 192, 192]) CHW


one_image_batch = cat_in_tensor_channel_first.unsqueeze(0)
out = conv_ch3(one_image_batch)
print("No error!")

No error!


from torchvision.transforms.functional import to_tensor

cat_in_tensor2 = to_tensor(cat_in_pil)
print(cat_in_tensor2.shape)

print(
    "Tensor almost equal: ",
    torch.allclose(cat_in_tensor_channel_first, cat_in_tensor2),  # float comparsion
)

torch.Size([3, 192, 192])
Tensor almost equal:  True


print("Output feature map size:", out.shape)  # first dim is batch

Output feature map size: torch.Size([1, 1, 188, 188])


print("Kernels", conv_ch3.weight.shape)
print("Biases", conv_ch3.bias.shape)

Kernels torch.Size([1, 3, 5, 5])
Biases torch.Size([1])


conv35 = Conv2d(in_channels=3, out_channels=5, kernel_size=3)
out = conv35(cat_in_tensor_channel_first)

print(f"weights shape: {conv35.weight.shape}")  # 5 filters 3x3x3
print(f"weights shape: {conv35.bias.shape}")  # one bias per filter

weights shape: torch.Size([5, 3, 3, 3])
weights shape: torch.Size([5])


print(f"result shape: {out.shape}")  # 5 feature map

result shape: torch.Size([5, 190, 190])


from torch.nn.functional import relu

conv_1 = torch.nn.Conv2d(
    in_channels=3,  # Number of input channels (3 for RGB images)
    out_channels=6,  # Number of filters/output channels
    kernel_size=5,
)

conv_2 = torch.nn.Conv2d(
    in_channels=6,  # Number of input channels (3 for RGB images)
    out_channels=10,  # Number of filters/output channels
    kernel_size=5,
)

img = torch.randn((1, 3, 32, 32))  # 1-batch size, 3-num of channels, (32,32)-img size
print(f"img shape: {img.shape}")

out_1 = conv_1(img)
print(f"out_1 shape: {out_1.shape}")  # [1, 6, 28, 28]

out_2 = conv_2(relu(out_1))
print(f"out_2 shape: {out_2.shape}")  # [1, 10, 24, 24]

img shape: torch.Size([1, 3, 32, 32])
out_1 shape: torch.Size([1, 6, 28, 28])
out_2 shape: torch.Size([1, 10, 24, 24])


img = torch.randn((1, 1, 5, 5))  # create random image BCHW
print(f"Original tensor:\nshape:{img.shape}")
conv_3 = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3)
conved_3 = conv_3(img)
print("Shape after convolution layer(kernel 3x3):", conved_3.shape)

Original tensor:
shape:torch.Size([1, 1, 5, 5])
Shape after convolution layer(kernel 3x3): torch.Size([1, 1, 3, 3])


# add zeros to image manually
padded_img = torch.zeros((1, 1, 7, 7))  # create zeros array to insert image in center
padded_img[:, :, 1:-1, 1:-1] += img  # insert image, we get image arounded by zeros
print(f"\nPadded tensor:\nshape:{padded_img.shape}:\n {padded_img}")

conved_pad_3 = conv_3(padded_img)
print("\n\nPadded shape:", padded_img.shape)
print("Shape after convolution with padding(kernel 3x3):", conved_pad_3.shape)

Padded tensor:
shape:torch.Size([1, 1, 7, 7]):
 tensor([[[[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
          [ 0.0000, -0.7762, -0.8809,  2.9101,  1.1978, -1.0568,  0.0000],
          [ 0.0000, -0.4706,  0.0981,  0.8564,  1.5882, -0.2351,  0.0000],
          [ 0.0000,  0.2247,  0.8475,  0.0281,  1.2842,  0.2463,  0.0000],
          [ 0.0000,  0.5440, -0.5140,  0.0858,  0.3820,  0.1245,  0.0000],
          [ 0.0000,  0.1286,  0.0221, -1.2747, -0.1029,  0.4962,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]]])


Padded shape: torch.Size([1, 1, 7, 7])
Shape after convolution with padding(kernel 3x3): torch.Size([1, 1, 5, 5])


conv_5 = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=5)

conved_pad_5 = conv_5(padded_img)

print("Original shape:", img.shape)
print("Shape after convolution with padding(kernel 5x5):", conved_pad_5.shape)

Original shape: torch.Size([1, 1, 5, 5])
Shape after convolution with padding(kernel 5x5): torch.Size([1, 1, 3, 3])


# conv layer without padding (padding=0 by default)
conv_3 = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=0)

# conv layer with padding = 1 (add zeros)
conv_3_padded = torch.nn.Conv2d(
    in_channels=1, out_channels=1, kernel_size=3, padding=1
)  # Padding added 1 zeros line to all four sides of the input
original = conv_3(padded_img)
padded = conv_3_padded(img)

print(f"Explicitly padded:\n{original.shape}")
print(f"\nImplicitly padded:\n{padded.shape}")

Explicitly padded:
torch.Size([1, 1, 5, 5])

Implicitly padded:
torch.Size([1, 1, 5, 5])


import torch
from torch import nn

batch_size = 1
input = torch.randn((batch_size, 1, 28, 28))

model = torch.nn.Sequential(
    nn.Conv2d(
        in_channels=1, out_channels=3, kernel_size=5
    ),  # after conv shape: [batch_size,3,24,24]
    nn.ReLU(),  # Activation doesn't depend on input shape
    nn.Conv2d(
        in_channels=3, out_channels=6, kernel_size=3
    ),  # after conv shape: [batch_size,6,22,22]
    nn.ReLU(),
    nn.Flatten(),  # 6*22*22=2904
    nn.Linear(2904, 100),
    nn.ReLU(),  # Activation doesn't depend on input shape
    nn.Linear(100, 10),  # 10 classes, like a cifar10
)

out = model(input)
print(f"out shape: {out.shape}")

out shape: torch.Size([1, 10])


input = torch.randn((16, 3, 32, 32))

batch_size = input.shape[0]

print("class Flatten\t", nn.Flatten()(input).shape)
print(
    "view \t\t", input.view(batch_size, -1).shape
)  # data stay in same place in memory
print("reshape \t", input.reshape(batch_size, -1).shape)  # data may be moved
print("method flatten \t", input.flatten(1).shape)

class Flatten	 torch.Size([16, 3072])
view 		 torch.Size([16, 3072])
reshape 	 torch.Size([16, 3072])
method flatten 	 torch.Size([16, 3072])


dummy_input = torch.randn(1, 1, 5, 5)
conv_s1 = nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3, stride=(1, 1))
conv_s2 = nn.Conv2d(1, 3, 3, stride=2)  # bypass par. names, stride = (2, 2)

out_stride1 = conv_s1(dummy_input)
out_stride2 = conv_s2(dummy_input)

print("Out with stride 1", out_stride1.shape)
print("Out with stride 2", out_stride2.shape)

Out with stride 1 torch.Size([1, 3, 3, 3])
Out with stride 2 torch.Size([1, 3, 2, 2])


# Create torch tensor 7x7
# fmt: off
input = torch.tensor([[[[1, 1, 1, 1, 1, 1, 99],
                        [1, 1, 1, 1, 1, 1, 99],
                        [1, 1, 1, 1, 1, 1, 99],
                        [1, 1, 1, 1, 1, 1, 99],
                        [1, 1, 1, 1, 1, 1, 99],
                        [1, 1, 1, 1, 1, 1, 99],
                        [1, 1, 1, 1, 1, 1, 99]]]], dtype=torch.float)
# fmt: on

print(f"input shape: {input.shape}")

conv = torch.nn.Conv2d(
    in_channels=1,  # Number of channels
    out_channels=1,  # Number of filters
    kernel_size=3,
    stride=3,
    bias=False,  # Don't use bias
)
conv.weight = torch.nn.Parameter(
    torch.ones((1, 1, 3, 3))
)  # Replace random weights to ones
out = conv(input)

print(f"out shape: {out.shape}")
print(f"out:\n{out}")

input shape: torch.Size([1, 1, 7, 7])
out shape: torch.Size([1, 1, 2, 2])
out:
tensor([[[[9., 9.],
          [9., 9.]]]], grad_fn=<ConvolutionBackward0>)


# create tensor 4x4
# fmt: off
input = torch.tensor([[[[1, 1, 2, 4],
                        [5, 6, 7, 8],
                        [3, 2, 1, 0],
                        [1, 2, 3, 4]]]], dtype=torch.float)
# fmt: on

max_pool = torch.nn.MaxPool2d(kernel_size=2, stride=2)
avg_pool = torch.nn.AvgPool2d(kernel_size=2, stride=2)

print("Input:\n", input)
print("Max pooling:\n", max_pool(input))
print("Average pooling:\n", avg_pool(input))

Input:
 tensor([[[[1., 1., 2., 4.],
          [5., 6., 7., 8.],
          [3., 2., 1., 0.],
          [1., 2., 3., 4.]]]])
Max pooling:
 tensor([[[[6., 8.],
          [3., 4.]]]])
Average pooling:
 tensor([[[[3.2500, 5.2500],
          [2.0000, 2.0000]]]])


conv = torch.nn.Conv2d(
    in_channels=64,  # Number of input channels
    out_channels=32,  # Number of filters
    kernel_size=1,
)

input = torch.randn((1, 64, 56, 56))
out = conv(input)

print("Input shape:", input.shape)
print("Shape after 1x1 conv:", out.shape)  # [1, 32, 56, 56] batch, C_out, H_out, W_out

Input shape: torch.Size([1, 64, 56, 56])
Shape after 1x1 conv: torch.Size([1, 32, 56, 56])


from torch.nn import Conv2d


def get_params_count(module):
    weights_count = 0
    # Get all model weights: kernels + biases
    for p in module.parameters():
        print(p.shape)
        # torch.prod - multiply all values in tensor
        weights_count += torch.tensor(p.shape).prod()
    print("Total weights", weights_count.item())


conv = Conv2d(3, 6, 3, bias=True)
get_params_count(conv)

torch.Size([6, 3, 3, 3])
torch.Size([6])
Total weights 168


from torch.nn import Linear

linear = Linear(3072, 6, bias=True)
get_params_count(linear)

torch.Size([6, 3072])
torch.Size([6])
Total weights 18438


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Conv2d(in_channels=1, out_channels=6, kernel_size=3)
model.to(device)  # send model to device

dummy_input = torch.randn(1, 1, 5, 5)
out = model(dummy_input.to(device))  # send data to GPU too!
# ... do backprop if need
out = out.cpu()  # move data back to main memory


import torchaudio

dataset = torchaudio.datasets.YESNO("./", download=True)

100%|██████████| 4.49M/4.49M [00:00<00:00, 38.1MB/s]


import matplotlib.pyplot as plt

plt.figure(figsize=(8, 5))
waveform, sample_rate, label = dataset[0]
plt.plot(waveform.flatten())
plt.show()


from torch import nn

conv = nn.Conv1d(1, 16, 3, stride=2)
output = conv(waveform)
print(output.shape)

torch.Size([16, 25399])


import librosa
from torchaudio.transforms import Spectrogram

spec_obj = Spectrogram(power=2, center=True, pad_mode="reflect")
spec = spec_obj(waveform[0])

plt.figure(figsize=(7, 5))
plt.imshow(librosa.power_to_db(spec))
plt.title("Spectrogram")
plt.xlabel("time")
plt.ylabel("freq")
plt.xticks([], [])
plt.yticks([], [])
plt.show()


import torch

# With cubic kernels and same stride
conv = nn.Conv3d(in_channels=16, out_channels=33, kernel_size=3, stride=2)

# non-square kernels with unequal stride and padding
conv = nn.Conv3d(
    in_channels=16,
    out_channels=33,
    kernel_size=(3, 5, 2),
    stride=(2, 1, 1),
    padding=(4, 2, 0),
)

input = torch.randn(20, 16, 10, 50, 100)
out = conv(input)

print("out shape: ", out.shape)

out shape:  torch.Size([20, 33, 8, 50, 99])


# !wget -q https://nipy.org/nibabel/_downloads/f76cc5a46e5368e2c779868abc49e497/someones_epi.nii.gz
!wget -q https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/someones_epi.nii.gz


import nibabel as nib

epi_img = nib.load("someones_epi.nii.gz")
epi_img_data = epi_img.get_fdata()
print(epi_img_data.shape)
print("Max", epi_img_data.max(), "Min", epi_img_data.min())

(53, 61, 33)
Max 103.76662158966064 Min 7.742551803588867


import numpy as np


def show_slices(ax, data):
    slices = np.linspace(0, len(data) - 1, num=10).astype(int)
    for i, sl in enumerate(slices):
        ax[i].axis("off")
        ax[i].imshow(data[sl], cmap="gray", origin="lower")

fig, axes = plt.subplots(3, 10, figsize=(16, 4))

show_slices(axes[0], epi_img_data)
show_slices(axes[1], np.moveaxis(epi_img_data, 0, 1))
show_slices(axes[2], np.moveaxis(epi_img_data, 0, 2))
plt.show()


import torch

brain_mrt = torch.Tensor(epi_img_data)
brain_mrt = brain_mrt.unsqueeze(0)  # add channel dim
print("Add channel dim", brain_mrt.shape)

Add channel dim torch.Size([1, 53, 61, 33])


from torch import nn

conv3d = nn.Conv3d(
    in_channels=1,
    out_channels=16,
    kernel_size=(3, 3, 3),
    stride=(1, 1, 1),
    padding=(1, 1, 1),
)

out = conv3d(brain_mrt.unsqueeze(0))  # add batch dim and run inference

print("out shape: ", out.shape)

out shape:  torch.Size([1, 16, 53, 61, 33])


!pip install -q torch_geometric

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.1/1.1 MB 12.9 MB/s eta 0:00:00


import torch

x = torch.tensor([[0], [1], [2], [3], [4], [5]], dtype=torch.float)


# fmt: off
edge_index = torch.tensor([[0, 1,  2, 2, 3, 4, 4],
                           [1, 2,  3, 4, 5, 2, 5]], dtype=torch.long)
# fmt: on


from torch_geometric.data import Data

data = Data(x=x, edge_index=edge_index)
data.validate(raise_on_error=True)  # optional check

True


import networkx as nx
from torch_geometric.utils import to_networkx

nx.draw(to_networkx(data, to_undirected=False))


import networkx as nx
import matplotlib.pyplot as plt
from torch_geometric.utils import to_networkx
from networkx.drawing.layout import kamada_kawai_layout


def show_graph(graph, colors=None, embeddings=False):
    fs = 14
    int2label = {}
    g = to_networkx(graph, to_undirected=False)  #
    if embeddings:
        for i, e in enumerate(graph.x):
            str_emb = ["{0:0.1f}".format(p.item()) for p in e]
            int2label[i] = f"{i}: [" + ",".join(str_emb) + "]"
        g = nx.relabel_nodes(g, int2label)
        fs = 10

    plt.axis("off")

    nx.draw_networkx(
        g,
        pos=kamada_kawai_layout(
            g, dim=2, scale=1, center=None
        ),  # nx.spring_layout(G, seed=0),
        with_labels=True,
        node_size=800,
        node_color=colors,  # data.y, for clustering
        # cmap="hsv",
        # vmin=-2,
        # vmax=3,
        # width=0.8,
        edge_color="grey",
        font_size=fs,
    )


plt.figure(figsize=(30, 4))
plt.subplot(1, 6, 1).set_title("GC Node0")
show_graph(data, ["green", "gray", "gray", "gray", "gray", "gray"])
plt.subplot(1, 6, 2).set_title("GC Node1")
show_graph(data, ["lightgreen", "green", "gray", "gray", "gray", "gray"])
plt.subplot(1, 6, 3).set_title("GC Node2")
show_graph(data, ["gray", "lightgreen", "green", "gray", "lightgreen", "gray"])
plt.subplot(1, 6, 4).set_title("GC Node3")
show_graph(data, ["gray", "gray", "lightgreen", "green", "gray", "gray"])
plt.subplot(1, 6, 5).set_title("GC Node4")
show_graph(data, ["gray", "gray", "lightgreen", "gray", "green", "gray"])
plt.subplot(1, 6, 6).set_title("GC Node5")
show_graph(data, ["gray", "gray", "gray", "lightgreen", "lightgreen", "green"])


from torch_geometric.nn import GCNConv

gcn = GCNConv(in_channels=1, out_channels=3)
print(gcn)

GCNConv(1, 3)


for name, p in gcn.named_parameters():
    print(name, p.shape)

bias torch.Size([3])
lin.weight torch.Size([3, 1])


from torch.nn.functional import one_hot

embeddings = one_hot(x.flatten().long()).float()
data = Data(x=embeddings, edge_index=edge_index)
data.validate(raise_on_error=True)

show_graph(data, embeddings=True)


print(embeddings)

tensor([[1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1.]])


gcn = GCNConv(len(x), 1, add_self_loops=False, bias=False, normalize=False)
gcn.lin.weight.data = torch.ones((1, len(x)))
print(gcn, " weights ", gcn.lin.weight)

GCNConv(6, 1)  weights  Parameter containing:
tensor([[1., 1., 1., 1., 1., 1.]], requires_grad=True)


out = gcn(embeddings, edge_index)
print(out)  # Embedding (dim=1) for every node from 0 ... 5

tensor([[0.],
        [1.],
        [2.],
        [1.],
        [1.],
        [2.]], grad_fn=<ScatterAddBackward0>)


y = embeddings @ gcn.lin.weight.data.T
print(y)

tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]])


show_graph(Data(x=y, edge_index=edge_index), embeddings=True)


oc = torch.zeros_like(y)  # out custom

oc[0] = 0  # no nighbors
oc[1] = y[0]  # one neighbor (#0)
oc[2] = y[1] + y[4]
oc[3] = y[2]
oc[4] = y[2]
oc[5] = y[3] + y[4]

print(oc)

assert torch.allclose(out, oc)

tensor([[0.],
        [1.],
        [2.],
        [1.],
        [1.],
        [2.]])


show_graph(Data(x=oc, edge_index=edge_index), embeddings=True)


gcn = GCNConv(len(x), 1, add_self_loops=True, bias=False, normalize=True)
gcn.lin.weight.data = torch.ones((1, len(x)))
out = gcn(embeddings, edge_index)
print(out)

tensor([[1.0000],
        [1.2071],
        [1.1498],
        [0.9082],
        [0.9082],
        [1.1498]], grad_fn=<ScatterAddBackward0>)


show_graph(Data(x=out, edge_index=edge_index), embeddings=True)


from math import sqrt


def get_neighbors(n):
    # find all neighbors of node n
    neighbors = [n]  # first put to neighbors list index of node itself
    for i, node_num in enumerate(edge_index[1]):
        if node_num == n:
            neighbors.append(edge_index[0][i].item())
    return neighbors


out_norm = torch.zeros_like(out)  # final summ
for i, e in enumerate(y):
    neighbors = get_neighbors(i)
    deg_i = len(neighbors)  # neighbors count of node i
    for node_num in neighbors:
        deg_j = len(
            get_neighbors(node_num)
        )  # neighbors count of j-th neighbor of node i
        out_norm[i] += y[node_num] / (
            sqrt(deg_i) * sqrt(deg_j)
        )  # Implementation of  equation (3)
print(out_norm)
assert torch.allclose(out, out_norm)  # check that results of CGNConv the same

tensor([[1.0000],
        [1.2071],
        [1.1498],
        [0.9082],
        [0.9082],
        [1.1498]])


print(edge_index)

tensor([[0, 1, 2, 2, 3, 4, 4],
        [1, 2, 3, 4, 5, 2, 5]])


edge_weight = torch.Tensor([1, 1, 1, 1, 1, 1, 20])  # increase 4->5 edge weight

out = gcn(embeddings, edge_index, edge_weight)
print(["{0:0.2f}".format(i.item()) for i in out])
show_graph(Data(x=out, edge_index=edge_index), embeddings=True)

['1.00', '1.21', '1.15', '0.91', '0.91', '3.21']


gcn = GCNConv(len(x), 8, add_self_loops=True, bias=False, normalize=True)
out = gcn(embeddings, edge_index)


show_graph(Data(x=out, edge_index=edge_index), embeddings=True)


from torch_geometric.nn import GCNConv


class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.gcn1 = GCNConv(6, 8)
        self.gcn2 = GCNConv(8, 16)

    def forward(self, x, edge_index, batch_index=None):
        x = self.gcn1(x, edge_index).relu()
        return self.gcn2(x, edge_index)


model = GCN()
print(model)

GCN(
  (gcn1): GCNConv(6, 8)
  (gcn2): GCNConv(8, 16)
)


out = model(embeddings, edge_index)
print(out.shape)
# show_graph(Data(x = out, edge_index=edge_index), embeddings =True)

torch.Size([6, 16])


plt.figure(figsize=(15, 4))
plt.subplot(1, 3, 1).set_title("GC Node3 Layer 0")
show_graph(data, ["gray", "gray", "lightgreen", "green", "gray", "gray"])
plt.subplot(1, 3, 2).set_title("GC Node3 Layer 1")
show_graph(data, ["gray", "lightgreen", "lightgreen", "green", "lightgreen", "gray"])
plt.subplot(1, 3, 3).set_title("GC Node3 Layer 3")
show_graph(
    data, ["lightgreen", "lightgreen", "lightgreen", "green", "lightgreen", "gray"]
)


from torchvision import models

alexnet = models.alexnet(weights="AlexNet_Weights.DEFAULT")
print(alexnet)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 123MB/s]

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace=True)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)


weight_tensor = alexnet.features[0].weight.data  # extract weights
print("Weights shape", weight_tensor.shape)

Weights shape torch.Size([64, 3, 11, 11])


from torchvision import utils

img_grid = utils.make_grid(
    (weight_tensor + 1) / 2, pad_value=1
)  # combine weights from all channel into table, note remapping to (0,1) range
print("Output is CxHxW image", img_grid.shape)

Output is CxHxW image torch.Size([3, 106, 106])


import numpy as np
import matplotlib.pyplot as plt


plt.rcParams["figure.figsize"] = (8, 8)
plt.imshow(
    np.transpose(img_grid, (1, 2, 0))
)  # change channel order for compability with numpy & matplotlib
plt.show()


weights_of_conv2_layer = alexnet.features[3].weight.data  # extract weights
print(weights_of_conv2_layer.shape)

torch.Size([192, 64, 5, 5])


first_filter_kernels = weights_of_conv2_layer[0]
print(first_filter_kernels.shape)

torch.Size([64, 5, 5])


img_grid = utils.make_grid(
    weights_of_conv2_layer[0].unsqueeze(1), pad_value=1  # add fake channel dim
)

plt.rcParams["figure.figsize"] = (8, 8)
plt.imshow(
    np.transpose((img_grid + 1) / 2, (1, 2, 0))
)  # change channel order for compability with numpy
plt.show()


from torch import nn


def module_hook(module: nn.Module, input, output):  # For nn.Module objects only.
    print("Hi, i am hook_1 ! ", output.shape)  # activation_map


handle = alexnet.features[10].register_forward_hook(
    module_hook
)  # attach hook to last conv layer


import torch

out = alexnet(torch.randn(1, 3, 224, 224))

Hi, i am hook_1 !  torch.Size([1, 256, 13, 13])


handle.remove()
out = alexnet(torch.randn(1, 3, 224, 224))


def module_hook(module: nn.Module, input, output):
    # activation_map = output.squeeze(0).unsqueeze(1) # alternative solution
    activation_map = output.permute(1, 0, 2, 3)  # B <--> C
    print(activation_map.shape)
    img_grid = utils.make_grid(activation_map, pad_value=10, nrow=16)
    plt.rcParams["figure.figsize"] = (8, 8)
    plt.imshow(
        np.transpose((img_grid.clamp(-1, 1) + 1) / 2, (1, 2, 0))
    )  # normalize to 0..1 range and change channel order for compability with numpy
    plt.show()


handle = alexnet.features[10].register_forward_hook(module_hook)


!wget -q https://edunet.kea.su/repo/EduNet-web_dependencies/dev-2.0/L06/fox.jpg


from PIL import Image

img_fox = Image.open("fox.jpg")

plt.rcParams["figure.figsize"] = (8, 8)
plt.imshow(img_fox)
plt.axis("off")
plt.show()


from torchvision import transforms

transform = transforms.Compose([transforms.Resize((256, 256)), transforms.ToTensor()])

tensor = transform(img_fox)
out = alexnet(tensor.unsqueeze(0))

torch.Size([256, 1, 15, 15])


handle.remove()


print(alexnet)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace=True)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)


alexnet.classifier[6] = nn.Identity()


from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor, Resize, Normalize, Compose
from torch.utils.data import DataLoader, random_split

torch.manual_seed(42)

transform = Compose(
    [Resize(224), ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
)

testset = CIFAR10(root="./CIFAR10", train=False, download=True, transform=transform)
train, test, _ = random_split(testset, [512, 128, 10000 - 512 - 128])
train_loader = DataLoader(train, batch_size=128, shuffle=False, drop_last=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./CIFAR10/cifar-10-python.tar.gz

100%|██████████| 170498071/170498071 [00:06<00:00, 27056439.40it/s]

Extracting ./CIFAR10/cifar-10-python.tar.gz to ./CIFAR10


from tqdm import tqdm


def get_embeddings(loader):
    embeddings = []
    labels = []
    for img, label in tqdm(loader):
        emb = alexnet(img)
        embeddings.append(emb.detach())
        labels.append(label)
    embeddings = torch.stack(embeddings).reshape(-1, 4096).numpy()
    labels = torch.stack(labels).flatten().numpy()
    return embeddings, labels


%%time
x, y = get_embeddings(train_loader)

100%|██████████| 4/4 [00:10<00:00,  2.64s/it]

CPU times: user 9.31 s, sys: 1.32 s, total: 10.6 s
Wall time: 10.6 s


print(x.shape, y.shape)

(512, 4096) (512,)


from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(x, y)

KNeighborsClassifier()

KNeighborsClassifier()


%%time

test_loader = DataLoader(test, batch_size=32, shuffle=False, drop_last=True)
test_emb, gt_labels = get_embeddings(test_loader)

100%|██████████| 4/4 [00:03<00:00,  1.26it/s]

CPU times: user 3.1 s, sys: 3.63 ms, total: 3.11 s
Wall time: 3.17 s


from sklearn.metrics import accuracy_score

y_pred = neigh.predict(test_emb)

accuracy = accuracy_score(gt_labels, y_pred)
print("k-NN accuracy", accuracy)

k-NN accuracy 0.4609375


from torchvision import models

model = models.alexnet(weights="AlexNet_Weights.DEFAULT")


# Freeze model parameters
for param in model.parameters():
    param.requires_grad = False


print(model.classifier)

Sequential(
  (0): Dropout(p=0.5, inplace=False)
  (1): Linear(in_features=9216, out_features=4096, bias=True)
  (2): ReLU(inplace=True)
  (3): Dropout(p=0.5, inplace=False)
  (4): Linear(in_features=4096, out_features=4096, bias=True)
  (5): ReLU(inplace=True)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)


from torch import nn

model.classifier[6] = nn.Linear(4096, 10, bias=True)  # For CIFAR


for name, param in model.named_parameters():
    print(name, "\t", param.requires_grad)

features.0.weight 	 False
features.0.bias 	 False
features.3.weight 	 False
features.3.bias 	 False
features.6.weight 	 False
features.6.bias 	 False
features.8.weight 	 False
features.8.bias 	 False
features.10.weight 	 False
features.10.bias 	 False
classifier.1.weight 	 False
classifier.1.bias 	 False
classifier.4.weight 	 False
classifier.4.bias 	 False
classifier.6.weight 	 True
classifier.6.bias 	 True


import torch
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor, Resize, Normalize, Compose
from torch.utils.data import DataLoader, random_split

torch.manual_seed(42)

transform = Compose(
    [Resize(224), ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
)

testset = CIFAR10(root="./CIFAR10", train=False, download=True, transform=transform)
train, test, _ = random_split(testset, [512, 128, 10000 - 512 - 128])
train_loader = DataLoader(train, batch_size=128, shuffle=False, drop_last=True)

Files already downloaded and verified


from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


def train_model(model, num_epochs=1, lr=1e-3):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), momentum=0.9, lr=lr)
    for epoch in range(num_epochs):
        for imgs, labels in tqdm(train_loader):
            optimizer.zero_grad()  # Clean existing gradients
            outputs = model(imgs.to(device))
            loss = criterion(outputs, labels.to(device))
            loss.backward()  # Backpropagate the gradients
            optimizer.step()
        print(f"\nEpoch {epoch} Loss {loss.item()}")


train_model(model, 5)  # train only last layer

100%|██████████| 4/4 [00:01<00:00,  2.90it/s]

Epoch 0 Loss 2.304867744445801

100%|██████████| 4/4 [00:01<00:00,  3.67it/s]

Epoch 1 Loss 1.919417381286621

100%|██████████| 4/4 [00:01<00:00,  3.62it/s]

Epoch 2 Loss 1.4428484439849854

100%|██████████| 4/4 [00:01<00:00,  3.56it/s]

Epoch 3 Loss 1.1411782503128052

100%|██████████| 4/4 [00:01<00:00,  3.68it/s]

Epoch 4 Loss 0.9886724352836609


# Freeze model parameters
for param in model.parameters():
    param.requires_grad = True


%%time
train_model(model, num_epochs=3, lr=1e-5)  # fine tune all layers

100%|██████████| 4/4 [00:01<00:00,  3.02it/s]

Epoch 0 Loss 0.9192925095558167

100%|██████████| 4/4 [00:01<00:00,  3.16it/s]

Epoch 1 Loss 0.8809319138526917

100%|██████████| 4/4 [00:01<00:00,  3.08it/s]

Epoch 2 Loss 0.9289202690124512
CPU times: user 3.22 s, sys: 616 ms, total: 3.84 s
Wall time: 4.07 s


test_loader = DataLoader(test, batch_size=32, shuffle=False, drop_last=True)
y_true = []
y_pred = []
for imgs, labels in test_loader:
    outputs = model(imgs.to(device))
    y_true.append(labels.numpy())
    preds = outputs.argmax(dim=1)
    y_pred.append(preds.detach().cpu().numpy())


import numpy as np
from sklearn.metrics import accuracy_score


y_true = np.stack(y_true).flatten()
y_pred = np.stack(y_pred).flatten()

accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy : {accuracy:.2f}")

Accuracy : 0.62


from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor, Resize, Normalize, Compose

transform = Compose(
    [Resize(224), ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]
)

testset = CIFAR10(root="./CIFAR10", train=False, download=True, transform=transform)

Files already downloaded and verified


import torch


# setting random seed for reproducible illustrations
torch.manual_seed(42)

URL = (
    "https://edunet.kea.su/repo/EduNet-web_dependencies/dev-2.0/L06/capybara_image.jpg"
)
!wget -q $URL -O test.jpg


from PIL import Image
from torchvision import transforms
from IPython.display import display

input_img = Image.open("/content/test.jpg")
input_img = transforms.Resize(size=300)(input_img)
display(input_img)


import matplotlib.pyplot as plt


def plot_augmented_img(transform, input_img):
    fig, ax = plt.subplots(1, 2, figsize=(15, 15))
    augmented_img = transform(input_img)
    ax[0].imshow(input_img)
    ax[0].set_title("Original img")
    ax[0].axis("off")

    ax[1].imshow(augmented_img)
    ax[1].set_title("Augmented img")
    ax[1].axis("off")
    plt.show()


transform = transforms.RandomRotation(degrees=(0, 180))

plot_augmented_img(transform, input_img)


transform = transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5))

plot_augmented_img(transform, input_img)


transform = transforms.Compose(
    [transforms.ToTensor(), transforms.RandomErasing(p=1), transforms.ToPILImage()]
)

plot_augmented_img(transform, input_img)


transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.RandomErasing(p=1, scale=(0.02, 0.1)),
        transforms.ToPILImage(),
    ]
)

plot_augmented_img(transform, input_img)


transform = transforms.ColorJitter(brightness=0.5, hue=0.3)

plot_augmented_img(transform, input_img)


transform = transforms.Compose(
    [
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
        transforms.RandomPerspective(distortion_scale=0.5, p=1.0),
        transforms.ColorJitter(brightness=0.5, hue=0.3),
    ]
)

plot_augmented_img(transform, input_img)


transform = transforms.RandomApply(
    transforms=[
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
        transforms.RandomPerspective(distortion_scale=0.5),
        transforms.ColorJitter(brightness=0.5, hue=0.3),
    ],
    p=0.9,
)

plot_augmented_img(transform, input_img)


transform = transforms.RandomChoice(
    transforms=[
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
        transforms.RandomPerspective(distortion_scale=0.5, p=1.0),
        transforms.ColorJitter(brightness=0.5, hue=0.3),
    ],
    p=[0.2, 0.4, 0.6],
)

plot_augmented_img(transform, input_img)
plot_augmented_img(transform, input_img)
plot_augmented_img(transform, input_img)


import numpy as np
from PIL import Image


class SaltAndPepperNoise:
    """
    Add a "salt and pepper" noise to the PIL image
    __call__ method returns PIL Image with noise
    """

    def __init__(self, p=0.01):
        self.p = p  # noise level

    def __call__(self, pil_image):
        np_image = np.array(pil_image)

        # create random mask for "salt" and "pepper" pixels
        salt_ind = np.random.choice(
            a=[True, False], size=np_image.shape[:2], p=[self.p, 1 - self.p]
        )
        pepper_ind = np.random.choice(
            a=[True, False], size=np_image.shape[:2], p=[self.p, 1 - self.p]
        )

        # add "salt" and "pepper"
        np_image[salt_ind] = 255
        np_image[pepper_ind] = 0

        return Image.fromarray(np_image)


transform = SaltAndPepperNoise(p=0.03)

plot_augmented_img(transform, input_img)


# download files
!wget -q "https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/for_transforms.Compose.zip" -O data.zip
!unzip -qn "data.zip"


import glob
from torch.utils.data import Dataset


class AugmentationDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.img_list = glob.glob(root + "*.jpg")
        self.transforms = transforms

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, i):
        img = Image.open(self.img_list[i])
        if self.transforms is not None:
            img = self.transforms(img)
        return img


transform = transforms.Compose(
    [
        transforms.Resize((164, 164)),
        transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
        transforms.RandomPerspective(distortion_scale=0.5),
        # ToTensor()
    ]
)

augmentated_dataset = AugmentationDataset(
    "/content/for_transforms.Compose/", transforms=transform
)


plt.figure(figsize=(22, 2))
for i, img in enumerate(augmentated_dataset):
    plt.subplot(1, len(augmentated_dataset) + 1, 1 + i)
    plt.axis("off")
    plt.imshow(np.array(img))
plt.show()

Введение в сверточные нейронные сети¶

Полносвязная нейронная сеть¶

Нарушение связей между соседними пикселями¶

Рецептивное поле¶

Скользящее окно (фильтр)¶

Фильтры размытия¶

Фильтры для обнаружения паттернов¶

Свертка с фильтром¶

Сверточный слой нейросети¶

Обработка цветных/многоканальных изображений¶

Использование нескольких фильтров¶

Уменьшение размера карты признаков¶

Расширение (padding)¶

Визуализация работы свертки¶

Применение свёрточных слоёв¶

Рецептивные поля нейронов¶

Шаг свёртки (Stride)¶

Уплотнение (Субдискретизация, Pooling)¶

Свёртка фильтром $1\times1$¶

Дополнительная информация¶

Сравнение свёрточного и полносвязного слоев¶

Сколько обучаемых праметров (весов) у свёрточного слоя?¶

Сколько обучаемых праметров у полносвязного слоя?¶

Сколько вычислительных ресурсов требуется полносвязному слою?¶

Сколько ресурсов требуется свёрточному слою?¶

Общая структура свёрточной нейронной сети¶

LeNet: пример архитектуры сверточной сети¶

Другие виды сверток¶

1D¶

Пример работы со спектрограммами растворов в работе выпускника курса¶

Дополнительная информация¶

Свертка через перемножение матриц¶

3D¶

Видеопоток¶

Медицинские 3D снимки¶

Молекулы¶

Графовые свертки¶

PyTorch Geometric¶

Визуализация графа¶

GCNConv¶

Визуализация¶

Визуализация весов¶

Визуализация фильтров промежуточных слоев¶

Визуализация карт активаций¶

Feature extractor¶

Transfer learning¶

Шаг 1. Получение предварительно обученной модели¶

Шаг 2. Заморозка предобученных слоев¶

Шаг 3. Добавление новых обучаемых слоев¶

Шаг 4. Обучение новых слоев¶

Шаг 5. Тонкая настройка модели (fine-tuning)¶

Аугментация¶

Random Rotation¶

Gaussian Blur¶

Random Erasing¶

ColorJitter¶

Совмещаем несколько аугментаций вместе¶

Совмещение нескольких аугментаций случайным образом¶

Random Apply¶

Random Choice¶

Пример создания собственной аугментации¶

Аугментация внутри Dataset¶

Аугментация в реальных задачах¶

Lightning¶

Практические рекомендации¶

Аугментация внутри `Dataset`¶