import torch
import random
import numpy as np

# fix random_seed
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)

# compute in cpu or gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# Full list of labels
#'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json'
!wget -q https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/imagenet_class_index.json

# https://github.com/ajschumacher/imagen.git
!wget -q https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/imagen.zip
!unzip -q imagen.zip


import json
import pprint
import numpy as np

pp = pprint.PrettyPrinter(width=41, compact=True)

with open("imagenet_class_index.json") as f:
    imagenet_labels = json.load(f)

classes = np.array(list(imagenet_labels.values()))[:, 1]

pp.pprint(
    dict(list(imagenet_labels.items())[:10])
)  # Use Pretty Print to display long dict

{'0': ['n01440764', 'tench'],
 '1': ['n01443537', 'goldfish'],
 '2': ['n01484850', 'great_white_shark'],
 '3': ['n01491361', 'tiger_shark'],
 '4': ['n01494475', 'hammerhead'],
 '5': ['n01496331', 'electric_ray'],
 '6': ['n01498041', 'stingray'],
 '7': ['n01514668', 'cock'],
 '8': ['n01514859', 'hen'],
 '9': ['n01518878', 'ostrich']}


from glob import glob
from PIL import Image
from torch.utils.data import Dataset


class MicroImageNet(Dataset):
    def __init__(self):
        super().__init__()
        # Load labels
        self.num2id = {}
        with open("imagenet_class_index.json") as f:
            imagenet_labels = json.load(f)
        w_net = {}
        # Because not all world net image codes from imagen exists in imagenet_labels
        # we need to filter this image
        for key in imagenet_labels.keys():
            wn_id = imagenet_labels[key][0]
            w_net[wn_id] = {"num": int(key), "name": imagenet_labels[key][1]}
        self.labels = []
        self.paths = []

        # Load data
        images = glob("imagen/*.jpg")
        images.sort()
        for i, path in enumerate(images):
            name = path.split("_")[2]  # Class name
            id = path.split("_")[0][7:]  # WorldNet based ID
            if w_net.get(id, None):
                self.labels.append([w_net[id]["num"], w_net[id]["name"], id])
                self.paths.append(path)

    def __getitem__(self, idx):
        im = Image.open(self.paths[idx])
        class_num = self.labels[idx][0]
        return im, class_num

    def __len__(self):
        return len(self.paths)


microImgNet = MicroImageNet()


import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (15, 10)


def show(img, label_1, num, label_2=""):
    ax = plt.subplot(2, 3, num + 1)
    plt.imshow(img)
    plt.title(label_1)
    ax.set_xlabel(label_2)
    plt.axis("off")


for i in range(6, 12):
    img, label = microImgNet[i * 6]
    name = microImgNet.labels[i * 6][1]
    show(img, name, i - 6)


from torchvision import models

alexnet = models.alexnet(weights="AlexNet_Weights.DEFAULT")

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:02<00:00, 116MB/s]


from torchsummary import summary

print("AlexNet architecture")
print(summary(alexnet, (3, 224, 224), device="cpu"))
print(alexnet)

AlexNet architecture
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1           [-1, 64, 55, 55]          23,296
              ReLU-2           [-1, 64, 55, 55]               0
         MaxPool2d-3           [-1, 64, 27, 27]               0
            Conv2d-4          [-1, 192, 27, 27]         307,392
              ReLU-5          [-1, 192, 27, 27]               0
         MaxPool2d-6          [-1, 192, 13, 13]               0
            Conv2d-7          [-1, 384, 13, 13]         663,936
              ReLU-8          [-1, 384, 13, 13]               0
            Conv2d-9          [-1, 256, 13, 13]         884,992
             ReLU-10          [-1, 256, 13, 13]               0
           Conv2d-11          [-1, 256, 13, 13]         590,080
             ReLU-12          [-1, 256, 13, 13]               0
        MaxPool2d-13            [-1, 256, 6, 6]               0
AdaptiveAvgPool2d-14            [-1, 256, 6, 6]               0
          Dropout-15                 [-1, 9216]               0
           Linear-16                 [-1, 4096]      37,752,832
             ReLU-17                 [-1, 4096]               0
          Dropout-18                 [-1, 4096]               0
           Linear-19                 [-1, 4096]      16,781,312
             ReLU-20                 [-1, 4096]               0
           Linear-21                 [-1, 1000]       4,097,000
================================================================
Total params: 61,100,840
Trainable params: 61,100,840
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 8.38
Params size (MB): 233.08
Estimated Total Size (MB): 242.03
----------------------------------------------------------------
None
AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace=True)
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace=True)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)


import torch
import torchvision.transforms.functional as F


def img2tensor(img):
    t = F.to_tensor(img)
    t = F.normalize(t, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    return t


def catId2names(nums):
    titles = []
    for num in nums:
        titles.append(imagenet_labels[str(num.item())][1])
        titles.reverse()
    return ", ".join(titles)


for i in range(6, 12):
    img, label = microImgNet[i * 6]
    tensor = img2tensor(img)
    out = alexnet(tensor.unsqueeze(0))  # Add batch dimension
    labels_num = torch.argsort(out[0])  # Ascending order
    weights = out[0][-5:]
    predicted = catId2names(labels_num[-5:])  # Top 5
    titles = []
    name = microImgNet.labels[i * 6][1]
    show(img, name, i - 6, predicted)


from torchvision import models

vgg = models.vgg16(
    weights=None
)  # Change on True if you want to use VGG to predict something
print(vgg)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)


import torch.nn as nn

conv_sizes = [11, 7, 5, 3]

for conv_size in conv_sizes:
    conv_layer = nn.Conv2d(3, 64, conv_size, stride=1, padding=1)
    print("Convolution size: %ix%i" % (conv_size, conv_size))
    for tag, p in conv_layer.named_parameters():
        print("Memory reqired for %s: %.2f kb" % (tag, (np.prod(p.shape) * 4) / 1024))

Convolution size: 11x11
Memory reqired for weight: 90.75 kb
Memory reqired for bias: 0.25 kb
Convolution size: 7x7
Memory reqired for weight: 36.75 kb
Memory reqired for bias: 0.25 kb
Convolution size: 5x5
Memory reqired for weight: 18.75 kb
Memory reqired for bias: 0.25 kb
Convolution size: 3x3
Memory reqired for weight: 6.75 kb
Memory reqired for bias: 0.25 kb


!nvidia-smi

Fri Jan 26 12:20:36 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|=========================================+======================+======================|
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   60C    P8              10W /  70W |      3MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                                         
+---------------------------------------------------------------------------------------+
| Processes:                                                                            |
|  GPU   GI   CI        PID   Type   Process name                            GPU Memory |
|        ID   ID                                                             Usage      |
|=======================================================================================|
|  No running processes found                                                           |
+---------------------------------------------------------------------------------------+


!pip install -q GPUtil

  Preparing metadata (setup.py) ... done
  Building wheel for GPUtil (setup.py) ... done


import os
import psutil
import GPUtil as GPU


def gpu_usage():
    GPUs = GPU.getGPUs()
    # XXX: only one GPU on Colab and isn’t guaranteed
    if len(GPUs) == 0:
        return False
    gpu = GPUs[0]
    process = psutil.Process(os.getpid())
    print(
        f"GPU RAM Free: {gpu.memoryFree:.0f}MB \
    | Used: {gpu.memoryUsed:.0f}MB \
    | Util {gpu.memoryUtil*100:3.0f}% \
    | Total {gpu.memoryTotal:.0f}MB"
    )


import torch
import torchvision


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

vgg19 = torchvision.models.vgg19(weights=None, progress=True)
vgg19.requires_grad = True
vgg19.to(device)

gpu_usage()  # Common GPU info

vgg19.train()

for batch_size in [1, 8, 16, 32, 64]:
    input_random = torch.rand(batch_size, 3, 224, 224, device=device)
    out = vgg19(input_random)
    print("Batch size", batch_size)
    gpu_usage()

GPU RAM Free: 14425MB     | Used: 677MB     | Util   4%     | Total 15360MB
Batch size 1
GPU RAM Free: 14289MB     | Used: 813MB     | Util   5%     | Total 15360MB
Batch size 8
GPU RAM Free: 13475MB     | Used: 1627MB     | Util  11%     | Total 15360MB
Batch size 16
GPU RAM Free: 12395MB     | Used: 2707MB     | Util  18%     | Total 15360MB
Batch size 32
GPU RAM Free: 9843MB     | Used: 5259MB     | Util  34%     | Total 15360MB
Batch size 64
GPU RAM Free: 5529MB     | Used: 9573MB     | Util  62%     | Total 15360MB


gpu_usage()

GPU RAM Free: 5529MB     | Used: 9573MB     | Util  62%     | Total 15360MB


input_random = None  # del input
out = None  # del out
gpu_usage()

GPU RAM Free: 5529MB     | Used: 9573MB     | Util  62%     | Total 15360MB


torch.cuda.empty_cache()
gpu_usage()

GPU RAM Free: 8267MB     | Used: 6835MB     | Util  44%     | Total 15360MB


vgg19 = None
gpu_usage()

GPU RAM Free: 8267MB     | Used: 6835MB     | Util  44%     | Total 15360MB


torch.cuda.empty_cache()


gpu_usage()

GPU RAM Free: 14369MB     | Used: 733MB     | Util   5%     | Total 15360MB


import torchvision

# https://pytorch.org/vision/stable/_modules/torchvision/models/googlenet.html#googlenet
googlenet = torchvision.models.googlenet(init_weights=True)
print(googlenet)

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(192, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch4): Sequential(
      (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
      (1): BasicConv2d(
        (conv): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (inception3b): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(128, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(256, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(32, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch4): Sequential(
      (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
      (1): BasicConv2d(
        (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (maxpool3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception4a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(480, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(480, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(96, 208, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(208, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(480, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(16, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch4): Sequential(
      (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
      (1): BasicConv2d(
        (conv): Conv2d(480, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (inception4b): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(512, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(160, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(512, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(112, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(112, 224, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(224, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(512, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(24, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch4): Sequential(
      (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
      (1): BasicConv2d(
        (conv): Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (inception4c): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(512, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(24, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch4): Sequential(
      (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
      (1): BasicConv2d(
        (conv): Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (inception4d): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(512, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(112, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(512, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(144, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(144, 288, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(288, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(512, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch4): Sequential(
      (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
      (1): BasicConv2d(
        (conv): Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (inception4e): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(528, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(528, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(160, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(160, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(320, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(528, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(32, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch4): Sequential(
      (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
      (1): BasicConv2d(
        (conv): Conv2d(528, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (maxpool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception5a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(832, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(832, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(160, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(160, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(320, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(832, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(32, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch4): Sequential(
      (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
      (1): BasicConv2d(
        (conv): Conv2d(832, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (inception5b): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(832, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (branch2): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(832, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(384, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch3): Sequential(
      (0): BasicConv2d(
        (conv): Conv2d(832, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicConv2d(
        (conv): Conv2d(48, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (branch4): Sequential(
      (0): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
      (1): BasicConv2d(
        (conv): Conv2d(832, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
  )
  (aux1): InceptionAux(
    (conv): BasicConv2d(
      (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (fc1): Linear(in_features=2048, out_features=1024, bias=True)
    (fc2): Linear(in_features=1024, out_features=1000, bias=True)
    (dropout): Dropout(p=0.7, inplace=False)
  )
  (aux2): InceptionAux(
    (conv): BasicConv2d(
      (conv): Conv2d(528, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    )
    (fc1): Linear(in_features=2048, out_features=1024, bias=True)
    (fc2): Linear(in_features=1024, out_features=1000, bias=True)
    (dropout): Dropout(p=0.7, inplace=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (dropout): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=1024, out_features=1000, bias=True)
)


gap = torch.nn.AdaptiveAvgPool2d(1)
dummy_input = torch.randn(1, 3, 6, 6)
out = gap(dummy_input)
print("Raw out shape", out.shape)
out = nn.Flatten()(out)
print("Flatten out shape", out.shape)

Raw out shape torch.Size([1, 3, 1, 1])
Flatten out shape torch.Size([1, 3])


import torch
import torch.nn as nn
from PIL import Image


def file2tensor(filename):
    img = Image.open(filename)
    t = torchvision.transforms.functional.to_tensor(img)
    t = torchvision.transforms.functional.normalize(
        t, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)
    )
    return t


class CNNfromHW(nn.Module):
    def __init__(self, conv_module=None):
        super().__init__()
        self.activation = nn.ReLU()
        self.conv1 = nn.Conv2d(3, 16, 5, padding=2)  # 16xHxW
        self.pool = nn.MaxPool2d(2, 2)  # 16 x H/2 x W/2
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)  # 32 x H/2 x W/2
        self.gap = nn.AdaptiveAvgPool2d((1, 1))  # Any spatial size -> 32x1x1
        self.fc = nn.Linear(32, 10)

    def forward(self, x):
        print("Input shape", x.shape)
        x = self.conv1(x)  # 16xHxW
        x = self.pool(x)  # 16 x H/2 x W/2
        x = self.conv2(x)  # 32 x H/2 x W/2
        x = self.activation(x)  # Any spatial size -> 32x1x1
        x = self.gap(x)
        scores = self.fc(x.flatten(1))
        print("Output shape", scores.shape)
        return scores


print("CIFAR10 like")
input_random = torch.rand(1, 3, 32, 32)
model_with_gap = CNNfromHW()
out = model_with_gap(input_random)


print("Arbitrary size")
# Different sizes work too!
aramdillo_t = file2tensor("imagen/n02454379_10511_armadillo.jpg")
out = model_with_gap(aramdillo_t.unsqueeze(0))

CIFAR10 like
Input shape torch.Size([1, 3, 32, 32])
Output shape torch.Size([1, 10])
Arbitrary size
Input shape torch.Size([1, 3, 500, 500])
Output shape torch.Size([1, 10])


import inspect
import torchvision.models.resnet as resnet

# BasicBlock
code = inspect.getsource(resnet.BasicBlock.forward)
print(code)

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


from torchvision import models

resnet = models.resnet18(weights=None)
print(resnet.layer2)

Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (downsample): Sequential(
      (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): BasicBlock(
    (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)


# CPU test
import time
import torch
from torch import nn


def time_synchronized():
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


input_random = torch.rand(8, 512, 112, 112)
start = time_synchronized()
normal_conv = nn.Conv2d(512, 1024, 3, groups=1)
out = normal_conv(input_random)
tm = time_synchronized() - start
print(f"Normal convolution take  {tm} sec.")

start = time_synchronized()
groupped_conv = nn.Conv2d(512, 1024, 3, groups=64)
out = groupped_conv(input_random)
tm = time_synchronized() - start
print(f"Groupped convolution take  {tm} sec.")

Normal convolution take  8.310923337936401 sec.
Groupped convolution take  1.1188368797302246 sec.


# GPU test
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
start = time_synchronized()
normal_conv = nn.Conv2d(512, 1024, 3, groups=1).to(device)
out = normal_conv(input_random.to(device))
tm = time_synchronized() - start
print(f"Normal convolution take  {tm} sec.")

start = time_synchronized()
groupped_conv = nn.Conv2d(512, 1024, 3, groups=64).to(device)
out = groupped_conv(input_random.to(device))
tm = time_synchronized() - start
print(f"Groupped convolution take  {tm} sec.")

Normal convolution take  0.32497549057006836 sec.
Groupped convolution take  0.061997413635253906 sec.


from torchvision import models
from torchsummary import summary

resnext = models.resnext50_32x4d(weights=None)

print(summary(resnext, (3, 224, 224), device="cpu"))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5          [-1, 128, 56, 56]           8,192
       BatchNorm2d-6          [-1, 128, 56, 56]             256
              ReLU-7          [-1, 128, 56, 56]               0
            Conv2d-8          [-1, 128, 56, 56]           4,608
       BatchNorm2d-9          [-1, 128, 56, 56]             256
             ReLU-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]          32,768
      BatchNorm2d-12          [-1, 256, 56, 56]             512
           Conv2d-13          [-1, 256, 56, 56]          16,384
      BatchNorm2d-14          [-1, 256, 56, 56]             512
             ReLU-15          [-1, 256, 56, 56]               0
       Bottleneck-16          [-1, 256, 56, 56]               0
           Conv2d-17          [-1, 128, 56, 56]          32,768
      BatchNorm2d-18          [-1, 128, 56, 56]             256
             ReLU-19          [-1, 128, 56, 56]               0
           Conv2d-20          [-1, 128, 56, 56]           4,608
      BatchNorm2d-21          [-1, 128, 56, 56]             256
             ReLU-22          [-1, 128, 56, 56]               0
           Conv2d-23          [-1, 256, 56, 56]          32,768
      BatchNorm2d-24          [-1, 256, 56, 56]             512
             ReLU-25          [-1, 256, 56, 56]               0
       Bottleneck-26          [-1, 256, 56, 56]               0
           Conv2d-27          [-1, 128, 56, 56]          32,768
      BatchNorm2d-28          [-1, 128, 56, 56]             256
             ReLU-29          [-1, 128, 56, 56]               0
           Conv2d-30          [-1, 128, 56, 56]           4,608
      BatchNorm2d-31          [-1, 128, 56, 56]             256
             ReLU-32          [-1, 128, 56, 56]               0
           Conv2d-33          [-1, 256, 56, 56]          32,768
      BatchNorm2d-34          [-1, 256, 56, 56]             512
             ReLU-35          [-1, 256, 56, 56]               0
       Bottleneck-36          [-1, 256, 56, 56]               0
           Conv2d-37          [-1, 256, 56, 56]          65,536
      BatchNorm2d-38          [-1, 256, 56, 56]             512
             ReLU-39          [-1, 256, 56, 56]               0
           Conv2d-40          [-1, 256, 28, 28]          18,432
      BatchNorm2d-41          [-1, 256, 28, 28]             512
             ReLU-42          [-1, 256, 28, 28]               0
           Conv2d-43          [-1, 512, 28, 28]         131,072
      BatchNorm2d-44          [-1, 512, 28, 28]           1,024
           Conv2d-45          [-1, 512, 28, 28]         131,072
      BatchNorm2d-46          [-1, 512, 28, 28]           1,024
             ReLU-47          [-1, 512, 28, 28]               0
       Bottleneck-48          [-1, 512, 28, 28]               0
           Conv2d-49          [-1, 256, 28, 28]         131,072
      BatchNorm2d-50          [-1, 256, 28, 28]             512
             ReLU-51          [-1, 256, 28, 28]               0
           Conv2d-52          [-1, 256, 28, 28]          18,432
      BatchNorm2d-53          [-1, 256, 28, 28]             512
             ReLU-54          [-1, 256, 28, 28]               0
           Conv2d-55          [-1, 512, 28, 28]         131,072
      BatchNorm2d-56          [-1, 512, 28, 28]           1,024
             ReLU-57          [-1, 512, 28, 28]               0
       Bottleneck-58          [-1, 512, 28, 28]               0
           Conv2d-59          [-1, 256, 28, 28]         131,072
      BatchNorm2d-60          [-1, 256, 28, 28]             512
             ReLU-61          [-1, 256, 28, 28]               0
           Conv2d-62          [-1, 256, 28, 28]          18,432
      BatchNorm2d-63          [-1, 256, 28, 28]             512
             ReLU-64          [-1, 256, 28, 28]               0
           Conv2d-65          [-1, 512, 28, 28]         131,072
      BatchNorm2d-66          [-1, 512, 28, 28]           1,024
             ReLU-67          [-1, 512, 28, 28]               0
       Bottleneck-68          [-1, 512, 28, 28]               0
           Conv2d-69          [-1, 256, 28, 28]         131,072
      BatchNorm2d-70          [-1, 256, 28, 28]             512
             ReLU-71          [-1, 256, 28, 28]               0
           Conv2d-72          [-1, 256, 28, 28]          18,432
      BatchNorm2d-73          [-1, 256, 28, 28]             512
             ReLU-74          [-1, 256, 28, 28]               0
           Conv2d-75          [-1, 512, 28, 28]         131,072
      BatchNorm2d-76          [-1, 512, 28, 28]           1,024
             ReLU-77          [-1, 512, 28, 28]               0
       Bottleneck-78          [-1, 512, 28, 28]               0
           Conv2d-79          [-1, 512, 28, 28]         262,144
      BatchNorm2d-80          [-1, 512, 28, 28]           1,024
             ReLU-81          [-1, 512, 28, 28]               0
           Conv2d-82          [-1, 512, 14, 14]          73,728
      BatchNorm2d-83          [-1, 512, 14, 14]           1,024
             ReLU-84          [-1, 512, 14, 14]               0
           Conv2d-85         [-1, 1024, 14, 14]         524,288
      BatchNorm2d-86         [-1, 1024, 14, 14]           2,048
           Conv2d-87         [-1, 1024, 14, 14]         524,288
      BatchNorm2d-88         [-1, 1024, 14, 14]           2,048
             ReLU-89         [-1, 1024, 14, 14]               0
       Bottleneck-90         [-1, 1024, 14, 14]               0
           Conv2d-91          [-1, 512, 14, 14]         524,288
      BatchNorm2d-92          [-1, 512, 14, 14]           1,024
             ReLU-93          [-1, 512, 14, 14]               0
           Conv2d-94          [-1, 512, 14, 14]          73,728
      BatchNorm2d-95          [-1, 512, 14, 14]           1,024
             ReLU-96          [-1, 512, 14, 14]               0
           Conv2d-97         [-1, 1024, 14, 14]         524,288
      BatchNorm2d-98         [-1, 1024, 14, 14]           2,048
             ReLU-99         [-1, 1024, 14, 14]               0
      Bottleneck-100         [-1, 1024, 14, 14]               0
          Conv2d-101          [-1, 512, 14, 14]         524,288
     BatchNorm2d-102          [-1, 512, 14, 14]           1,024
            ReLU-103          [-1, 512, 14, 14]               0
          Conv2d-104          [-1, 512, 14, 14]          73,728
     BatchNorm2d-105          [-1, 512, 14, 14]           1,024
            ReLU-106          [-1, 512, 14, 14]               0
          Conv2d-107         [-1, 1024, 14, 14]         524,288
     BatchNorm2d-108         [-1, 1024, 14, 14]           2,048
            ReLU-109         [-1, 1024, 14, 14]               0
      Bottleneck-110         [-1, 1024, 14, 14]               0
          Conv2d-111          [-1, 512, 14, 14]         524,288
     BatchNorm2d-112          [-1, 512, 14, 14]           1,024
            ReLU-113          [-1, 512, 14, 14]               0
          Conv2d-114          [-1, 512, 14, 14]          73,728
     BatchNorm2d-115          [-1, 512, 14, 14]           1,024
            ReLU-116          [-1, 512, 14, 14]               0
          Conv2d-117         [-1, 1024, 14, 14]         524,288
     BatchNorm2d-118         [-1, 1024, 14, 14]           2,048
            ReLU-119         [-1, 1024, 14, 14]               0
      Bottleneck-120         [-1, 1024, 14, 14]               0
          Conv2d-121          [-1, 512, 14, 14]         524,288
     BatchNorm2d-122          [-1, 512, 14, 14]           1,024
            ReLU-123          [-1, 512, 14, 14]               0
          Conv2d-124          [-1, 512, 14, 14]          73,728
     BatchNorm2d-125          [-1, 512, 14, 14]           1,024
            ReLU-126          [-1, 512, 14, 14]               0
          Conv2d-127         [-1, 1024, 14, 14]         524,288
     BatchNorm2d-128         [-1, 1024, 14, 14]           2,048
            ReLU-129         [-1, 1024, 14, 14]               0
      Bottleneck-130         [-1, 1024, 14, 14]               0
          Conv2d-131          [-1, 512, 14, 14]         524,288
     BatchNorm2d-132          [-1, 512, 14, 14]           1,024
            ReLU-133          [-1, 512, 14, 14]               0
          Conv2d-134          [-1, 512, 14, 14]          73,728
     BatchNorm2d-135          [-1, 512, 14, 14]           1,024
            ReLU-136          [-1, 512, 14, 14]               0
          Conv2d-137         [-1, 1024, 14, 14]         524,288
     BatchNorm2d-138         [-1, 1024, 14, 14]           2,048
            ReLU-139         [-1, 1024, 14, 14]               0
      Bottleneck-140         [-1, 1024, 14, 14]               0
          Conv2d-141         [-1, 1024, 14, 14]       1,048,576
     BatchNorm2d-142         [-1, 1024, 14, 14]           2,048
            ReLU-143         [-1, 1024, 14, 14]               0
          Conv2d-144           [-1, 1024, 7, 7]         294,912
     BatchNorm2d-145           [-1, 1024, 7, 7]           2,048
            ReLU-146           [-1, 1024, 7, 7]               0
          Conv2d-147           [-1, 2048, 7, 7]       2,097,152
     BatchNorm2d-148           [-1, 2048, 7, 7]           4,096
          Conv2d-149           [-1, 2048, 7, 7]       2,097,152
     BatchNorm2d-150           [-1, 2048, 7, 7]           4,096
            ReLU-151           [-1, 2048, 7, 7]               0
      Bottleneck-152           [-1, 2048, 7, 7]               0
          Conv2d-153           [-1, 1024, 7, 7]       2,097,152
     BatchNorm2d-154           [-1, 1024, 7, 7]           2,048
            ReLU-155           [-1, 1024, 7, 7]               0
          Conv2d-156           [-1, 1024, 7, 7]         294,912
     BatchNorm2d-157           [-1, 1024, 7, 7]           2,048
            ReLU-158           [-1, 1024, 7, 7]               0
          Conv2d-159           [-1, 2048, 7, 7]       2,097,152
     BatchNorm2d-160           [-1, 2048, 7, 7]           4,096
            ReLU-161           [-1, 2048, 7, 7]               0
      Bottleneck-162           [-1, 2048, 7, 7]               0
          Conv2d-163           [-1, 1024, 7, 7]       2,097,152
     BatchNorm2d-164           [-1, 1024, 7, 7]           2,048
            ReLU-165           [-1, 1024, 7, 7]               0
          Conv2d-166           [-1, 1024, 7, 7]         294,912
     BatchNorm2d-167           [-1, 1024, 7, 7]           2,048
            ReLU-168           [-1, 1024, 7, 7]               0
          Conv2d-169           [-1, 2048, 7, 7]       2,097,152
     BatchNorm2d-170           [-1, 2048, 7, 7]           4,096
            ReLU-171           [-1, 2048, 7, 7]               0
      Bottleneck-172           [-1, 2048, 7, 7]               0
AdaptiveAvgPool2d-173           [-1, 2048, 1, 1]               0
          Linear-174                 [-1, 1000]       2,049,000
================================================================
Total params: 25,028,904
Trainable params: 25,028,904
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 361.78
Params size (MB): 95.48
Estimated Total Size (MB): 457.83
----------------------------------------------------------------
None


## Custom SE block


class SE_Block(nn.Module):
    "credits: https://github.com/moskomule/senet.pytorch/blob/master/senet/se_module.py#L4"

    def __init__(self, c, r=16):
        super().__init__()
        self.squeeze = nn.AdaptiveAvgPool2d(1)
        self.excitation = nn.Sequential(
            nn.Linear(c, c // r, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(c // r, c, bias=False),
            nn.Sigmoid(),
        )

    def forward(self, x):
        bs, c, _, _ = x.shape
        y = self.squeeze(x).view(bs, c)
        y = self.excitation(y).view(bs, c, 1, 1)
        print("Coefficients ", y.shape)
        return x * y.expand_as(x)


dummy = torch.randn(16, 256, 7, 7)

se_block = SE_Block(256)  # for 256 channels
print("Absolute sum", dummy.abs().sum().item())
se_out = se_block(dummy)
print("Sum after se_block", se_out.abs().sum().item())

Absolute sum 160095.890625
Coefficients  torch.Size([16, 256, 1, 1])
Sum after se_block 80070.8125


from torchsummary import summary
from torchvision.models import efficientnet_b0

en_b0 = efficientnet_b0()
print(summary(en_b0, (3, 224, 224), device="cpu"))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              SiLU-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
       BatchNorm2d-5         [-1, 32, 112, 112]              64
              SiLU-6         [-1, 32, 112, 112]               0
 AdaptiveAvgPool2d-7             [-1, 32, 1, 1]               0
            Conv2d-8              [-1, 8, 1, 1]             264
              SiLU-9              [-1, 8, 1, 1]               0
           Conv2d-10             [-1, 32, 1, 1]             288
          Sigmoid-11             [-1, 32, 1, 1]               0
SqueezeExcitation-12         [-1, 32, 112, 112]               0
           Conv2d-13         [-1, 16, 112, 112]             512
      BatchNorm2d-14         [-1, 16, 112, 112]              32
           MBConv-15         [-1, 16, 112, 112]               0
           Conv2d-16         [-1, 96, 112, 112]           1,536
      BatchNorm2d-17         [-1, 96, 112, 112]             192
             SiLU-18         [-1, 96, 112, 112]               0
           Conv2d-19           [-1, 96, 56, 56]             864
      BatchNorm2d-20           [-1, 96, 56, 56]             192
             SiLU-21           [-1, 96, 56, 56]               0
AdaptiveAvgPool2d-22             [-1, 96, 1, 1]               0
           Conv2d-23              [-1, 4, 1, 1]             388
             SiLU-24              [-1, 4, 1, 1]               0
           Conv2d-25             [-1, 96, 1, 1]             480
          Sigmoid-26             [-1, 96, 1, 1]               0
SqueezeExcitation-27           [-1, 96, 56, 56]               0
           Conv2d-28           [-1, 24, 56, 56]           2,304
      BatchNorm2d-29           [-1, 24, 56, 56]              48
           MBConv-30           [-1, 24, 56, 56]               0
           Conv2d-31          [-1, 144, 56, 56]           3,456
      BatchNorm2d-32          [-1, 144, 56, 56]             288
             SiLU-33          [-1, 144, 56, 56]               0
           Conv2d-34          [-1, 144, 56, 56]           1,296
      BatchNorm2d-35          [-1, 144, 56, 56]             288
             SiLU-36          [-1, 144, 56, 56]               0
AdaptiveAvgPool2d-37            [-1, 144, 1, 1]               0
           Conv2d-38              [-1, 6, 1, 1]             870
             SiLU-39              [-1, 6, 1, 1]               0
           Conv2d-40            [-1, 144, 1, 1]           1,008
          Sigmoid-41            [-1, 144, 1, 1]               0
SqueezeExcitation-42          [-1, 144, 56, 56]               0
           Conv2d-43           [-1, 24, 56, 56]           3,456
      BatchNorm2d-44           [-1, 24, 56, 56]              48
  StochasticDepth-45           [-1, 24, 56, 56]               0
           MBConv-46           [-1, 24, 56, 56]               0
           Conv2d-47          [-1, 144, 56, 56]           3,456
      BatchNorm2d-48          [-1, 144, 56, 56]             288
             SiLU-49          [-1, 144, 56, 56]               0
           Conv2d-50          [-1, 144, 28, 28]           3,600
      BatchNorm2d-51          [-1, 144, 28, 28]             288
             SiLU-52          [-1, 144, 28, 28]               0
AdaptiveAvgPool2d-53            [-1, 144, 1, 1]               0
           Conv2d-54              [-1, 6, 1, 1]             870
             SiLU-55              [-1, 6, 1, 1]               0
           Conv2d-56            [-1, 144, 1, 1]           1,008
          Sigmoid-57            [-1, 144, 1, 1]               0
SqueezeExcitation-58          [-1, 144, 28, 28]               0
           Conv2d-59           [-1, 40, 28, 28]           5,760
      BatchNorm2d-60           [-1, 40, 28, 28]              80
           MBConv-61           [-1, 40, 28, 28]               0
           Conv2d-62          [-1, 240, 28, 28]           9,600
      BatchNorm2d-63          [-1, 240, 28, 28]             480
             SiLU-64          [-1, 240, 28, 28]               0
           Conv2d-65          [-1, 240, 28, 28]           6,000
      BatchNorm2d-66          [-1, 240, 28, 28]             480
             SiLU-67          [-1, 240, 28, 28]               0
AdaptiveAvgPool2d-68            [-1, 240, 1, 1]               0
           Conv2d-69             [-1, 10, 1, 1]           2,410
             SiLU-70             [-1, 10, 1, 1]               0
           Conv2d-71            [-1, 240, 1, 1]           2,640
          Sigmoid-72            [-1, 240, 1, 1]               0
SqueezeExcitation-73          [-1, 240, 28, 28]               0
           Conv2d-74           [-1, 40, 28, 28]           9,600
      BatchNorm2d-75           [-1, 40, 28, 28]              80
  StochasticDepth-76           [-1, 40, 28, 28]               0
           MBConv-77           [-1, 40, 28, 28]               0
           Conv2d-78          [-1, 240, 28, 28]           9,600
      BatchNorm2d-79          [-1, 240, 28, 28]             480
             SiLU-80          [-1, 240, 28, 28]               0
           Conv2d-81          [-1, 240, 14, 14]           2,160
      BatchNorm2d-82          [-1, 240, 14, 14]             480
             SiLU-83          [-1, 240, 14, 14]               0
AdaptiveAvgPool2d-84            [-1, 240, 1, 1]               0
           Conv2d-85             [-1, 10, 1, 1]           2,410
             SiLU-86             [-1, 10, 1, 1]               0
           Conv2d-87            [-1, 240, 1, 1]           2,640
          Sigmoid-88            [-1, 240, 1, 1]               0
SqueezeExcitation-89          [-1, 240, 14, 14]               0
           Conv2d-90           [-1, 80, 14, 14]          19,200
      BatchNorm2d-91           [-1, 80, 14, 14]             160
           MBConv-92           [-1, 80, 14, 14]               0
           Conv2d-93          [-1, 480, 14, 14]          38,400
      BatchNorm2d-94          [-1, 480, 14, 14]             960
             SiLU-95          [-1, 480, 14, 14]               0
           Conv2d-96          [-1, 480, 14, 14]           4,320
      BatchNorm2d-97          [-1, 480, 14, 14]             960
             SiLU-98          [-1, 480, 14, 14]               0
AdaptiveAvgPool2d-99            [-1, 480, 1, 1]               0
          Conv2d-100             [-1, 20, 1, 1]           9,620
            SiLU-101             [-1, 20, 1, 1]               0
          Conv2d-102            [-1, 480, 1, 1]          10,080
         Sigmoid-103            [-1, 480, 1, 1]               0
SqueezeExcitation-104          [-1, 480, 14, 14]               0
          Conv2d-105           [-1, 80, 14, 14]          38,400
     BatchNorm2d-106           [-1, 80, 14, 14]             160
 StochasticDepth-107           [-1, 80, 14, 14]               0
          MBConv-108           [-1, 80, 14, 14]               0
          Conv2d-109          [-1, 480, 14, 14]          38,400
     BatchNorm2d-110          [-1, 480, 14, 14]             960
            SiLU-111          [-1, 480, 14, 14]               0
          Conv2d-112          [-1, 480, 14, 14]           4,320
     BatchNorm2d-113          [-1, 480, 14, 14]             960
            SiLU-114          [-1, 480, 14, 14]               0
AdaptiveAvgPool2d-115            [-1, 480, 1, 1]               0
          Conv2d-116             [-1, 20, 1, 1]           9,620
            SiLU-117             [-1, 20, 1, 1]               0
          Conv2d-118            [-1, 480, 1, 1]          10,080
         Sigmoid-119            [-1, 480, 1, 1]               0
SqueezeExcitation-120          [-1, 480, 14, 14]               0
          Conv2d-121           [-1, 80, 14, 14]          38,400
     BatchNorm2d-122           [-1, 80, 14, 14]             160
 StochasticDepth-123           [-1, 80, 14, 14]               0
          MBConv-124           [-1, 80, 14, 14]               0
          Conv2d-125          [-1, 480, 14, 14]          38,400
     BatchNorm2d-126          [-1, 480, 14, 14]             960
            SiLU-127          [-1, 480, 14, 14]               0
          Conv2d-128          [-1, 480, 14, 14]          12,000
     BatchNorm2d-129          [-1, 480, 14, 14]             960
            SiLU-130          [-1, 480, 14, 14]               0
AdaptiveAvgPool2d-131            [-1, 480, 1, 1]               0
          Conv2d-132             [-1, 20, 1, 1]           9,620
            SiLU-133             [-1, 20, 1, 1]               0
          Conv2d-134            [-1, 480, 1, 1]          10,080
         Sigmoid-135            [-1, 480, 1, 1]               0
SqueezeExcitation-136          [-1, 480, 14, 14]               0
          Conv2d-137          [-1, 112, 14, 14]          53,760
     BatchNorm2d-138          [-1, 112, 14, 14]             224
          MBConv-139          [-1, 112, 14, 14]               0
          Conv2d-140          [-1, 672, 14, 14]          75,264
     BatchNorm2d-141          [-1, 672, 14, 14]           1,344
            SiLU-142          [-1, 672, 14, 14]               0
          Conv2d-143          [-1, 672, 14, 14]          16,800
     BatchNorm2d-144          [-1, 672, 14, 14]           1,344
            SiLU-145          [-1, 672, 14, 14]               0
AdaptiveAvgPool2d-146            [-1, 672, 1, 1]               0
          Conv2d-147             [-1, 28, 1, 1]          18,844
            SiLU-148             [-1, 28, 1, 1]               0
          Conv2d-149            [-1, 672, 1, 1]          19,488
         Sigmoid-150            [-1, 672, 1, 1]               0
SqueezeExcitation-151          [-1, 672, 14, 14]               0
          Conv2d-152          [-1, 112, 14, 14]          75,264
     BatchNorm2d-153          [-1, 112, 14, 14]             224
 StochasticDepth-154          [-1, 112, 14, 14]               0
          MBConv-155          [-1, 112, 14, 14]               0
          Conv2d-156          [-1, 672, 14, 14]          75,264
     BatchNorm2d-157          [-1, 672, 14, 14]           1,344
            SiLU-158          [-1, 672, 14, 14]               0
          Conv2d-159          [-1, 672, 14, 14]          16,800
     BatchNorm2d-160          [-1, 672, 14, 14]           1,344
            SiLU-161          [-1, 672, 14, 14]               0
AdaptiveAvgPool2d-162            [-1, 672, 1, 1]               0
          Conv2d-163             [-1, 28, 1, 1]          18,844
            SiLU-164             [-1, 28, 1, 1]               0
          Conv2d-165            [-1, 672, 1, 1]          19,488
         Sigmoid-166            [-1, 672, 1, 1]               0
SqueezeExcitation-167          [-1, 672, 14, 14]               0
          Conv2d-168          [-1, 112, 14, 14]          75,264
     BatchNorm2d-169          [-1, 112, 14, 14]             224
 StochasticDepth-170          [-1, 112, 14, 14]               0
          MBConv-171          [-1, 112, 14, 14]               0
          Conv2d-172          [-1, 672, 14, 14]          75,264
     BatchNorm2d-173          [-1, 672, 14, 14]           1,344
            SiLU-174          [-1, 672, 14, 14]               0
          Conv2d-175            [-1, 672, 7, 7]          16,800
     BatchNorm2d-176            [-1, 672, 7, 7]           1,344
            SiLU-177            [-1, 672, 7, 7]               0
AdaptiveAvgPool2d-178            [-1, 672, 1, 1]               0
          Conv2d-179             [-1, 28, 1, 1]          18,844
            SiLU-180             [-1, 28, 1, 1]               0
          Conv2d-181            [-1, 672, 1, 1]          19,488
         Sigmoid-182            [-1, 672, 1, 1]               0
SqueezeExcitation-183            [-1, 672, 7, 7]               0
          Conv2d-184            [-1, 192, 7, 7]         129,024
     BatchNorm2d-185            [-1, 192, 7, 7]             384
          MBConv-186            [-1, 192, 7, 7]               0
          Conv2d-187           [-1, 1152, 7, 7]         221,184
     BatchNorm2d-188           [-1, 1152, 7, 7]           2,304
            SiLU-189           [-1, 1152, 7, 7]               0
          Conv2d-190           [-1, 1152, 7, 7]          28,800
     BatchNorm2d-191           [-1, 1152, 7, 7]           2,304
            SiLU-192           [-1, 1152, 7, 7]               0
AdaptiveAvgPool2d-193           [-1, 1152, 1, 1]               0
          Conv2d-194             [-1, 48, 1, 1]          55,344
            SiLU-195             [-1, 48, 1, 1]               0
          Conv2d-196           [-1, 1152, 1, 1]          56,448
         Sigmoid-197           [-1, 1152, 1, 1]               0
SqueezeExcitation-198           [-1, 1152, 7, 7]               0
          Conv2d-199            [-1, 192, 7, 7]         221,184
     BatchNorm2d-200            [-1, 192, 7, 7]             384
 StochasticDepth-201            [-1, 192, 7, 7]               0
          MBConv-202            [-1, 192, 7, 7]               0
          Conv2d-203           [-1, 1152, 7, 7]         221,184
     BatchNorm2d-204           [-1, 1152, 7, 7]           2,304
            SiLU-205           [-1, 1152, 7, 7]               0
          Conv2d-206           [-1, 1152, 7, 7]          28,800
     BatchNorm2d-207           [-1, 1152, 7, 7]           2,304
            SiLU-208           [-1, 1152, 7, 7]               0
AdaptiveAvgPool2d-209           [-1, 1152, 1, 1]               0
          Conv2d-210             [-1, 48, 1, 1]          55,344
            SiLU-211             [-1, 48, 1, 1]               0
          Conv2d-212           [-1, 1152, 1, 1]          56,448
         Sigmoid-213           [-1, 1152, 1, 1]               0
SqueezeExcitation-214           [-1, 1152, 7, 7]               0
          Conv2d-215            [-1, 192, 7, 7]         221,184
     BatchNorm2d-216            [-1, 192, 7, 7]             384
 StochasticDepth-217            [-1, 192, 7, 7]               0
          MBConv-218            [-1, 192, 7, 7]               0
          Conv2d-219           [-1, 1152, 7, 7]         221,184
     BatchNorm2d-220           [-1, 1152, 7, 7]           2,304
            SiLU-221           [-1, 1152, 7, 7]               0
          Conv2d-222           [-1, 1152, 7, 7]          28,800
     BatchNorm2d-223           [-1, 1152, 7, 7]           2,304
            SiLU-224           [-1, 1152, 7, 7]               0
AdaptiveAvgPool2d-225           [-1, 1152, 1, 1]               0
          Conv2d-226             [-1, 48, 1, 1]          55,344
            SiLU-227             [-1, 48, 1, 1]               0
          Conv2d-228           [-1, 1152, 1, 1]          56,448
         Sigmoid-229           [-1, 1152, 1, 1]               0
SqueezeExcitation-230           [-1, 1152, 7, 7]               0
          Conv2d-231            [-1, 192, 7, 7]         221,184
     BatchNorm2d-232            [-1, 192, 7, 7]             384
 StochasticDepth-233            [-1, 192, 7, 7]               0
          MBConv-234            [-1, 192, 7, 7]               0
          Conv2d-235           [-1, 1152, 7, 7]         221,184
     BatchNorm2d-236           [-1, 1152, 7, 7]           2,304
            SiLU-237           [-1, 1152, 7, 7]               0
          Conv2d-238           [-1, 1152, 7, 7]          10,368
     BatchNorm2d-239           [-1, 1152, 7, 7]           2,304
            SiLU-240           [-1, 1152, 7, 7]               0
AdaptiveAvgPool2d-241           [-1, 1152, 1, 1]               0
          Conv2d-242             [-1, 48, 1, 1]          55,344
            SiLU-243             [-1, 48, 1, 1]               0
          Conv2d-244           [-1, 1152, 1, 1]          56,448
         Sigmoid-245           [-1, 1152, 1, 1]               0
SqueezeExcitation-246           [-1, 1152, 7, 7]               0
          Conv2d-247            [-1, 320, 7, 7]         368,640
     BatchNorm2d-248            [-1, 320, 7, 7]             640
          MBConv-249            [-1, 320, 7, 7]               0
          Conv2d-250           [-1, 1280, 7, 7]         409,600
     BatchNorm2d-251           [-1, 1280, 7, 7]           2,560
            SiLU-252           [-1, 1280, 7, 7]               0
AdaptiveAvgPool2d-253           [-1, 1280, 1, 1]               0
         Dropout-254                 [-1, 1280]               0
          Linear-255                 [-1, 1000]       1,281,000
================================================================
Total params: 5,288,548
Trainable params: 5,288,548
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 173.65
Params size (MB): 20.17
Estimated Total Size (MB): 194.40
----------------------------------------------------------------
None


!pip install -q timm

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.2/2.2 MB 12.3 MB/s eta 0:00:00


import torch
import random
import numpy as np

# fix random_seed
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)

# compute in cpu or gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


import timm

len(timm.list_models())

1017


timm.list_models("efficientnet*", pretrained=True)

['efficientnet_b0.ra_in1k',
 'efficientnet_b1.ft_in1k',
 'efficientnet_b1_pruned.in1k',
 'efficientnet_b2.ra_in1k',
 'efficientnet_b2_pruned.in1k',
 'efficientnet_b3.ra2_in1k',
 'efficientnet_b3_pruned.in1k',
 'efficientnet_b4.ra2_in1k',
 'efficientnet_b5.sw_in12k',
 'efficientnet_b5.sw_in12k_ft_in1k',
 'efficientnet_el.ra_in1k',
 'efficientnet_el_pruned.in1k',
 'efficientnet_em.ra2_in1k',
 'efficientnet_es.ra_in1k',
 'efficientnet_es_pruned.in1k',
 'efficientnet_lite0.ra_in1k',
 'efficientnetv2_rw_m.agc_in1k',
 'efficientnetv2_rw_s.ra2_in1k',
 'efficientnetv2_rw_t.ra2_in1k']


model_name = "efficientnet_lite0.ra_in1k"
pretrained_model = timm.create_model(model_name, pretrained=True)

print(pretrained_model)

/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: 
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  warnings.warn(

model.safetensors:   0%|          | 0.00/18.8M [00:00<?, ?B/s]

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): ReLU6(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNormAct2d(
          32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
          16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (1): Sequential(
      (0): InvertedResidual(
        (conv_pw): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False)
        (bn2): BatchNormAct2d(
          96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (1): InvertedResidual(
        (conv_pw): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False)
        (bn2): BatchNormAct2d(
          144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (2): Sequential(
      (0): InvertedResidual(
        (conv_pw): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(144, 144, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=144, bias=False)
        (bn2): BatchNormAct2d(
          144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(144, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (1): InvertedResidual(
        (conv_pw): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(240, 240, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=240, bias=False)
        (bn2): BatchNormAct2d(
          240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(240, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (3): Sequential(
      (0): InvertedResidual(
        (conv_pw): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(240, 240, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=240, bias=False)
        (bn2): BatchNormAct2d(
          240, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (1): InvertedResidual(
        (conv_pw): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (bn2): BatchNormAct2d(
          480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (2): InvertedResidual(
        (conv_pw): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (bn2): BatchNormAct2d(
          480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (4): Sequential(
      (0): InvertedResidual(
        (conv_pw): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(480, 480, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=480, bias=False)
        (bn2): BatchNormAct2d(
          480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (1): InvertedResidual(
        (conv_pw): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False)
        (bn2): BatchNormAct2d(
          672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (2): InvertedResidual(
        (conv_pw): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False)
        (bn2): BatchNormAct2d(
          672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          112, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (5): Sequential(
      (0): InvertedResidual(
        (conv_pw): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(672, 672, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=672, bias=False)
        (bn2): BatchNormAct2d(
          672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(672, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (1): InvertedResidual(
        (conv_pw): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False)
        (bn2): BatchNormAct2d(
          1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (2): InvertedResidual(
        (conv_pw): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False)
        (bn2): BatchNormAct2d(
          1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (3): InvertedResidual(
        (conv_pw): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False)
        (bn2): BatchNormAct2d(
          1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (6): Sequential(
      (0): InvertedResidual(
        (conv_pw): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNormAct2d(
          1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(1152, 1152, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1152, bias=False)
        (bn2): BatchNormAct2d(
          1152, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(1152, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNormAct2d(
          320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
  )
  (conv_head): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn2): BatchNormAct2d(
    1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): ReLU6(inplace=True)
  )
  (global_pool): SelectAdaptivePool2d(pool_type=avg, flatten=Flatten(start_dim=1, end_dim=-1))
  (classifier): Linear(in_features=1280, out_features=1000, bias=True)
)


pretrained_model = timm.create_model(model_name, pretrained=True, num_classes=10)
pretrained_model.classifier

Linear(in_features=1280, out_features=10, bias=True)


pretrained_model.classifier = nn.Linear(1280, 10)


layers = list(pretrained_model.children())
print("3 layer:", layers[3])
print("layers:", len(layers))

3 layer: Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
layers: 7


new_layer = nn.Conv2d(in_channels=320, out_channels=512, kernel_size=(1, 1), bias=False)
layers[3] = new_layer

modified_model = nn.Sequential(*layers)

print("3 layer:", layers[3])

3 layer: Conv2d(320, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)


num_in_features = pretrained_model.get_classifier().in_features
num_in_features

1280


pretrained_model.classifier = nn.Sequential(
    nn.Linear(in_features=num_in_features, out_features=512, bias=False),
    nn.ReLU(),
    nn.BatchNorm1d(512),
    nn.Dropout(0.4),
    nn.Linear(in_features=512, out_features=10, bias=False),
)


pretrained_model.classifier

Sequential(
  (0): Linear(in_features=1280, out_features=512, bias=False)
  (1): ReLU()
  (2): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.4, inplace=False)
  (4): Linear(in_features=512, out_features=10, bias=False)
)


pretrained_model_2 = timm.create_model(
    model_name, pretrained=True, num_classes=10, in_chans=8
)
x = torch.rand(1, 8, 224, 224)
pretrained_model_2(x).shape

torch.Size([1, 10])


from torchvision.models.feature_extraction import get_graph_node_names
from warnings import simplefilter

simplefilter("ignore", UserWarning)

get_graph_node_names(pretrained_model)[0]

['x',
 'conv_stem',
 'bn1.getattr',
 'bn1.eq',
 'bn1.getattr_1',
 'bn1._assert',
 'bn1.bn1_weight',
 'bn1.bn1_bias',
 'bn1.batch_norm',
 'bn1.drop',
 'bn1.act',
 'blocks.0.0.conv_dw',
 'blocks.0.0.bn1.getattr',
 'blocks.0.0.bn1.eq',
 'blocks.0.0.bn1.getattr_1',
 'blocks.0.0.bn1._assert',
 'blocks.0.0.bn1.blocks_0_0_bn1_weight',
 'blocks.0.0.bn1.blocks_0_0_bn1_bias',
 'blocks.0.0.bn1.batch_norm',
 'blocks.0.0.bn1.drop',
 'blocks.0.0.bn1.act',
 'blocks.0.0.se',
 'blocks.0.0.conv_pw',
 'blocks.0.0.bn2.getattr',
 'blocks.0.0.bn2.eq',
 'blocks.0.0.bn2.getattr_1',
 'blocks.0.0.bn2._assert',
 'blocks.0.0.bn2.blocks_0_0_bn2_weight',
 'blocks.0.0.bn2.blocks_0_0_bn2_bias',
 'blocks.0.0.bn2.batch_norm',
 'blocks.0.0.bn2.drop',
 'blocks.0.0.bn2.act',
 'blocks.1.0.conv_pw',
 'blocks.1.0.bn1.getattr',
 'blocks.1.0.bn1.eq',
 'blocks.1.0.bn1.getattr_1',
 'blocks.1.0.bn1._assert',
 'blocks.1.0.bn1.blocks_1_0_bn1_weight',
 'blocks.1.0.bn1.blocks_1_0_bn1_bias',
 'blocks.1.0.bn1.batch_norm',
 'blocks.1.0.bn1.drop',
 'blocks.1.0.bn1.act',
 'blocks.1.0.conv_dw',
 'blocks.1.0.bn2.getattr',
 'blocks.1.0.bn2.eq',
 'blocks.1.0.bn2.getattr_1',
 'blocks.1.0.bn2._assert',
 'blocks.1.0.bn2.blocks_1_0_bn2_weight',
 'blocks.1.0.bn2.blocks_1_0_bn2_bias',
 'blocks.1.0.bn2.batch_norm',
 'blocks.1.0.bn2.drop',
 'blocks.1.0.bn2.act',
 'blocks.1.0.se',
 'blocks.1.0.conv_pwl',
 'blocks.1.0.bn3.getattr',
 'blocks.1.0.bn3.eq',
 'blocks.1.0.bn3.getattr_1',
 'blocks.1.0.bn3._assert',
 'blocks.1.0.bn3.blocks_1_0_bn3_weight',
 'blocks.1.0.bn3.blocks_1_0_bn3_bias',
 'blocks.1.0.bn3.batch_norm',
 'blocks.1.0.bn3.drop',
 'blocks.1.0.bn3.act',
 'blocks.1.1.conv_pw',
 'blocks.1.1.bn1.getattr',
 'blocks.1.1.bn1.eq',
 'blocks.1.1.bn1.getattr_1',
 'blocks.1.1.bn1._assert',
 'blocks.1.1.bn1.blocks_1_1_bn1_weight',
 'blocks.1.1.bn1.blocks_1_1_bn1_bias',
 'blocks.1.1.bn1.batch_norm',
 'blocks.1.1.bn1.drop',
 'blocks.1.1.bn1.act',
 'blocks.1.1.conv_dw',
 'blocks.1.1.bn2.getattr',
 'blocks.1.1.bn2.eq',
 'blocks.1.1.bn2.getattr_1',
 'blocks.1.1.bn2._assert',
 'blocks.1.1.bn2.blocks_1_1_bn2_weight',
 'blocks.1.1.bn2.blocks_1_1_bn2_bias',
 'blocks.1.1.bn2.batch_norm',
 'blocks.1.1.bn2.drop',
 'blocks.1.1.bn2.act',
 'blocks.1.1.se',
 'blocks.1.1.conv_pwl',
 'blocks.1.1.bn3.getattr',
 'blocks.1.1.bn3.eq',
 'blocks.1.1.bn3.getattr_1',
 'blocks.1.1.bn3._assert',
 'blocks.1.1.bn3.blocks_1_1_bn3_weight',
 'blocks.1.1.bn3.blocks_1_1_bn3_bias',
 'blocks.1.1.bn3.batch_norm',
 'blocks.1.1.bn3.drop',
 'blocks.1.1.bn3.act',
 'blocks.1.1.drop_path',
 'blocks.1.1.add',
 'blocks.2.0.conv_pw',
 'blocks.2.0.bn1.getattr',
 'blocks.2.0.bn1.eq',
 'blocks.2.0.bn1.getattr_1',
 'blocks.2.0.bn1._assert',
 'blocks.2.0.bn1.blocks_2_0_bn1_weight',
 'blocks.2.0.bn1.blocks_2_0_bn1_bias',
 'blocks.2.0.bn1.batch_norm',
 'blocks.2.0.bn1.drop',
 'blocks.2.0.bn1.act',
 'blocks.2.0.conv_dw',
 'blocks.2.0.bn2.getattr',
 'blocks.2.0.bn2.eq',
 'blocks.2.0.bn2.getattr_1',
 'blocks.2.0.bn2._assert',
 'blocks.2.0.bn2.blocks_2_0_bn2_weight',
 'blocks.2.0.bn2.blocks_2_0_bn2_bias',
 'blocks.2.0.bn2.batch_norm',
 'blocks.2.0.bn2.drop',
 'blocks.2.0.bn2.act',
 'blocks.2.0.se',
 'blocks.2.0.conv_pwl',
 'blocks.2.0.bn3.getattr',
 'blocks.2.0.bn3.eq',
 'blocks.2.0.bn3.getattr_1',
 'blocks.2.0.bn3._assert',
 'blocks.2.0.bn3.blocks_2_0_bn3_weight',
 'blocks.2.0.bn3.blocks_2_0_bn3_bias',
 'blocks.2.0.bn3.batch_norm',
 'blocks.2.0.bn3.drop',
 'blocks.2.0.bn3.act',
 'blocks.2.1.conv_pw',
 'blocks.2.1.bn1.getattr',
 'blocks.2.1.bn1.eq',
 'blocks.2.1.bn1.getattr_1',
 'blocks.2.1.bn1._assert',
 'blocks.2.1.bn1.blocks_2_1_bn1_weight',
 'blocks.2.1.bn1.blocks_2_1_bn1_bias',
 'blocks.2.1.bn1.batch_norm',
 'blocks.2.1.bn1.drop',
 'blocks.2.1.bn1.act',
 'blocks.2.1.conv_dw',
 'blocks.2.1.bn2.getattr',
 'blocks.2.1.bn2.eq',
 'blocks.2.1.bn2.getattr_1',
 'blocks.2.1.bn2._assert',
 'blocks.2.1.bn2.blocks_2_1_bn2_weight',
 'blocks.2.1.bn2.blocks_2_1_bn2_bias',
 'blocks.2.1.bn2.batch_norm',
 'blocks.2.1.bn2.drop',
 'blocks.2.1.bn2.act',
 'blocks.2.1.se',
 'blocks.2.1.conv_pwl',
 'blocks.2.1.bn3.getattr',
 'blocks.2.1.bn3.eq',
 'blocks.2.1.bn3.getattr_1',
 'blocks.2.1.bn3._assert',
 'blocks.2.1.bn3.blocks_2_1_bn3_weight',
 'blocks.2.1.bn3.blocks_2_1_bn3_bias',
 'blocks.2.1.bn3.batch_norm',
 'blocks.2.1.bn3.drop',
 'blocks.2.1.bn3.act',
 'blocks.2.1.drop_path',
 'blocks.2.1.add',
 'blocks.3.0.conv_pw',
 'blocks.3.0.bn1.getattr',
 'blocks.3.0.bn1.eq',
 'blocks.3.0.bn1.getattr_1',
 'blocks.3.0.bn1._assert',
 'blocks.3.0.bn1.blocks_3_0_bn1_weight',
 'blocks.3.0.bn1.blocks_3_0_bn1_bias',
 'blocks.3.0.bn1.batch_norm',
 'blocks.3.0.bn1.drop',
 'blocks.3.0.bn1.act',
 'blocks.3.0.conv_dw',
 'blocks.3.0.bn2.getattr',
 'blocks.3.0.bn2.eq',
 'blocks.3.0.bn2.getattr_1',
 'blocks.3.0.bn2._assert',
 'blocks.3.0.bn2.blocks_3_0_bn2_weight',
 'blocks.3.0.bn2.blocks_3_0_bn2_bias',
 'blocks.3.0.bn2.batch_norm',
 'blocks.3.0.bn2.drop',
 'blocks.3.0.bn2.act',
 'blocks.3.0.se',
 'blocks.3.0.conv_pwl',
 'blocks.3.0.bn3.getattr',
 'blocks.3.0.bn3.eq',
 'blocks.3.0.bn3.getattr_1',
 'blocks.3.0.bn3._assert',
 'blocks.3.0.bn3.blocks_3_0_bn3_weight',
 'blocks.3.0.bn3.blocks_3_0_bn3_bias',
 'blocks.3.0.bn3.batch_norm',
 'blocks.3.0.bn3.drop',
 'blocks.3.0.bn3.act',
 'blocks.3.1.conv_pw',
 'blocks.3.1.bn1.getattr',
 'blocks.3.1.bn1.eq',
 'blocks.3.1.bn1.getattr_1',
 'blocks.3.1.bn1._assert',
 'blocks.3.1.bn1.blocks_3_1_bn1_weight',
 'blocks.3.1.bn1.blocks_3_1_bn1_bias',
 'blocks.3.1.bn1.batch_norm',
 'blocks.3.1.bn1.drop',
 'blocks.3.1.bn1.act',
 'blocks.3.1.conv_dw',
 'blocks.3.1.bn2.getattr',
 'blocks.3.1.bn2.eq',
 'blocks.3.1.bn2.getattr_1',
 'blocks.3.1.bn2._assert',
 'blocks.3.1.bn2.blocks_3_1_bn2_weight',
 'blocks.3.1.bn2.blocks_3_1_bn2_bias',
 'blocks.3.1.bn2.batch_norm',
 'blocks.3.1.bn2.drop',
 'blocks.3.1.bn2.act',
 'blocks.3.1.se',
 'blocks.3.1.conv_pwl',
 'blocks.3.1.bn3.getattr',
 'blocks.3.1.bn3.eq',
 'blocks.3.1.bn3.getattr_1',
 'blocks.3.1.bn3._assert',
 'blocks.3.1.bn3.blocks_3_1_bn3_weight',
 'blocks.3.1.bn3.blocks_3_1_bn3_bias',
 'blocks.3.1.bn3.batch_norm',
 'blocks.3.1.bn3.drop',
 'blocks.3.1.bn3.act',
 'blocks.3.1.drop_path',
 'blocks.3.1.add',
 'blocks.3.2.conv_pw',
 'blocks.3.2.bn1.getattr',
 'blocks.3.2.bn1.eq',
 'blocks.3.2.bn1.getattr_1',
 'blocks.3.2.bn1._assert',
 'blocks.3.2.bn1.blocks_3_2_bn1_weight',
 'blocks.3.2.bn1.blocks_3_2_bn1_bias',
 'blocks.3.2.bn1.batch_norm',
 'blocks.3.2.bn1.drop',
 'blocks.3.2.bn1.act',
 'blocks.3.2.conv_dw',
 'blocks.3.2.bn2.getattr',
 'blocks.3.2.bn2.eq',
 'blocks.3.2.bn2.getattr_1',
 'blocks.3.2.bn2._assert',
 'blocks.3.2.bn2.blocks_3_2_bn2_weight',
 'blocks.3.2.bn2.blocks_3_2_bn2_bias',
 'blocks.3.2.bn2.batch_norm',
 'blocks.3.2.bn2.drop',
 'blocks.3.2.bn2.act',
 'blocks.3.2.se',
 'blocks.3.2.conv_pwl',
 'blocks.3.2.bn3.getattr',
 'blocks.3.2.bn3.eq',
 'blocks.3.2.bn3.getattr_1',
 'blocks.3.2.bn3._assert',
 'blocks.3.2.bn3.blocks_3_2_bn3_weight',
 'blocks.3.2.bn3.blocks_3_2_bn3_bias',
 'blocks.3.2.bn3.batch_norm',
 'blocks.3.2.bn3.drop',
 'blocks.3.2.bn3.act',
 'blocks.3.2.drop_path',
 'blocks.3.2.add',
 'blocks.4.0.conv_pw',
 'blocks.4.0.bn1.getattr',
 'blocks.4.0.bn1.eq',
 'blocks.4.0.bn1.getattr_1',
 'blocks.4.0.bn1._assert',
 'blocks.4.0.bn1.blocks_4_0_bn1_weight',
 'blocks.4.0.bn1.blocks_4_0_bn1_bias',
 'blocks.4.0.bn1.batch_norm',
 'blocks.4.0.bn1.drop',
 'blocks.4.0.bn1.act',
 'blocks.4.0.conv_dw',
 'blocks.4.0.bn2.getattr',
 'blocks.4.0.bn2.eq',
 'blocks.4.0.bn2.getattr_1',
 'blocks.4.0.bn2._assert',
 'blocks.4.0.bn2.blocks_4_0_bn2_weight',
 'blocks.4.0.bn2.blocks_4_0_bn2_bias',
 'blocks.4.0.bn2.batch_norm',
 'blocks.4.0.bn2.drop',
 'blocks.4.0.bn2.act',
 'blocks.4.0.se',
 'blocks.4.0.conv_pwl',
 'blocks.4.0.bn3.getattr',
 'blocks.4.0.bn3.eq',
 'blocks.4.0.bn3.getattr_1',
 'blocks.4.0.bn3._assert',
 'blocks.4.0.bn3.blocks_4_0_bn3_weight',
 'blocks.4.0.bn3.blocks_4_0_bn3_bias',
 'blocks.4.0.bn3.batch_norm',
 'blocks.4.0.bn3.drop',
 'blocks.4.0.bn3.act',
 'blocks.4.1.conv_pw',
 'blocks.4.1.bn1.getattr',
 'blocks.4.1.bn1.eq',
 'blocks.4.1.bn1.getattr_1',
 'blocks.4.1.bn1._assert',
 'blocks.4.1.bn1.blocks_4_1_bn1_weight',
 'blocks.4.1.bn1.blocks_4_1_bn1_bias',
 'blocks.4.1.bn1.batch_norm',
 'blocks.4.1.bn1.drop',
 'blocks.4.1.bn1.act',
 'blocks.4.1.conv_dw',
 'blocks.4.1.bn2.getattr',
 'blocks.4.1.bn2.eq',
 'blocks.4.1.bn2.getattr_1',
 'blocks.4.1.bn2._assert',
 'blocks.4.1.bn2.blocks_4_1_bn2_weight',
 'blocks.4.1.bn2.blocks_4_1_bn2_bias',
 'blocks.4.1.bn2.batch_norm',
 'blocks.4.1.bn2.drop',
 'blocks.4.1.bn2.act',
 'blocks.4.1.se',
 'blocks.4.1.conv_pwl',
 'blocks.4.1.bn3.getattr',
 'blocks.4.1.bn3.eq',
 'blocks.4.1.bn3.getattr_1',
 'blocks.4.1.bn3._assert',
 'blocks.4.1.bn3.blocks_4_1_bn3_weight',
 'blocks.4.1.bn3.blocks_4_1_bn3_bias',
 'blocks.4.1.bn3.batch_norm',
 'blocks.4.1.bn3.drop',
 'blocks.4.1.bn3.act',
 'blocks.4.1.drop_path',
 'blocks.4.1.add',
 'blocks.4.2.conv_pw',
 'blocks.4.2.bn1.getattr',
 'blocks.4.2.bn1.eq',
 'blocks.4.2.bn1.getattr_1',
 'blocks.4.2.bn1._assert',
 'blocks.4.2.bn1.blocks_4_2_bn1_weight',
 'blocks.4.2.bn1.blocks_4_2_bn1_bias',
 'blocks.4.2.bn1.batch_norm',
 'blocks.4.2.bn1.drop',
 'blocks.4.2.bn1.act',
 'blocks.4.2.conv_dw',
 'blocks.4.2.bn2.getattr',
 'blocks.4.2.bn2.eq',
 'blocks.4.2.bn2.getattr_1',
 'blocks.4.2.bn2._assert',
 'blocks.4.2.bn2.blocks_4_2_bn2_weight',
 'blocks.4.2.bn2.blocks_4_2_bn2_bias',
 'blocks.4.2.bn2.batch_norm',
 'blocks.4.2.bn2.drop',
 'blocks.4.2.bn2.act',
 'blocks.4.2.se',
 'blocks.4.2.conv_pwl',
 'blocks.4.2.bn3.getattr',
 'blocks.4.2.bn3.eq',
 'blocks.4.2.bn3.getattr_1',
 'blocks.4.2.bn3._assert',
 'blocks.4.2.bn3.blocks_4_2_bn3_weight',
 'blocks.4.2.bn3.blocks_4_2_bn3_bias',
 'blocks.4.2.bn3.batch_norm',
 'blocks.4.2.bn3.drop',
 'blocks.4.2.bn3.act',
 'blocks.4.2.drop_path',
 'blocks.4.2.add',
 'blocks.5.0.conv_pw',
 'blocks.5.0.bn1.getattr',
 'blocks.5.0.bn1.eq',
 'blocks.5.0.bn1.getattr_1',
 'blocks.5.0.bn1._assert',
 'blocks.5.0.bn1.blocks_5_0_bn1_weight',
 'blocks.5.0.bn1.blocks_5_0_bn1_bias',
 'blocks.5.0.bn1.batch_norm',
 'blocks.5.0.bn1.drop',
 'blocks.5.0.bn1.act',
 'blocks.5.0.conv_dw',
 'blocks.5.0.bn2.getattr',
 'blocks.5.0.bn2.eq',
 'blocks.5.0.bn2.getattr_1',
 'blocks.5.0.bn2._assert',
 'blocks.5.0.bn2.blocks_5_0_bn2_weight',
 'blocks.5.0.bn2.blocks_5_0_bn2_bias',
 'blocks.5.0.bn2.batch_norm',
 'blocks.5.0.bn2.drop',
 'blocks.5.0.bn2.act',
 'blocks.5.0.se',
 'blocks.5.0.conv_pwl',
 'blocks.5.0.bn3.getattr',
 'blocks.5.0.bn3.eq',
 'blocks.5.0.bn3.getattr_1',
 'blocks.5.0.bn3._assert',
 'blocks.5.0.bn3.blocks_5_0_bn3_weight',
 'blocks.5.0.bn3.blocks_5_0_bn3_bias',
 'blocks.5.0.bn3.batch_norm',
 'blocks.5.0.bn3.drop',
 'blocks.5.0.bn3.act',
 'blocks.5.1.conv_pw',
 'blocks.5.1.bn1.getattr',
 'blocks.5.1.bn1.eq',
 'blocks.5.1.bn1.getattr_1',
 'blocks.5.1.bn1._assert',
 'blocks.5.1.bn1.blocks_5_1_bn1_weight',
 'blocks.5.1.bn1.blocks_5_1_bn1_bias',
 'blocks.5.1.bn1.batch_norm',
 'blocks.5.1.bn1.drop',
 'blocks.5.1.bn1.act',
 'blocks.5.1.conv_dw',
 'blocks.5.1.bn2.getattr',
 'blocks.5.1.bn2.eq',
 'blocks.5.1.bn2.getattr_1',
 'blocks.5.1.bn2._assert',
 'blocks.5.1.bn2.blocks_5_1_bn2_weight',
 'blocks.5.1.bn2.blocks_5_1_bn2_bias',
 'blocks.5.1.bn2.batch_norm',
 'blocks.5.1.bn2.drop',
 'blocks.5.1.bn2.act',
 'blocks.5.1.se',
 'blocks.5.1.conv_pwl',
 'blocks.5.1.bn3.getattr',
 'blocks.5.1.bn3.eq',
 'blocks.5.1.bn3.getattr_1',
 'blocks.5.1.bn3._assert',
 'blocks.5.1.bn3.blocks_5_1_bn3_weight',
 'blocks.5.1.bn3.blocks_5_1_bn3_bias',
 'blocks.5.1.bn3.batch_norm',
 'blocks.5.1.bn3.drop',
 'blocks.5.1.bn3.act',
 'blocks.5.1.drop_path',
 'blocks.5.1.add',
 'blocks.5.2.conv_pw',
 'blocks.5.2.bn1.getattr',
 'blocks.5.2.bn1.eq',
 'blocks.5.2.bn1.getattr_1',
 'blocks.5.2.bn1._assert',
 'blocks.5.2.bn1.blocks_5_2_bn1_weight',
 'blocks.5.2.bn1.blocks_5_2_bn1_bias',
 'blocks.5.2.bn1.batch_norm',
 'blocks.5.2.bn1.drop',
 'blocks.5.2.bn1.act',
 'blocks.5.2.conv_dw',
 'blocks.5.2.bn2.getattr',
 'blocks.5.2.bn2.eq',
 'blocks.5.2.bn2.getattr_1',
 'blocks.5.2.bn2._assert',
 'blocks.5.2.bn2.blocks_5_2_bn2_weight',
 'blocks.5.2.bn2.blocks_5_2_bn2_bias',
 'blocks.5.2.bn2.batch_norm',
 'blocks.5.2.bn2.drop',
 'blocks.5.2.bn2.act',
 'blocks.5.2.se',
 'blocks.5.2.conv_pwl',
 'blocks.5.2.bn3.getattr',
 'blocks.5.2.bn3.eq',
 'blocks.5.2.bn3.getattr_1',
 'blocks.5.2.bn3._assert',
 'blocks.5.2.bn3.blocks_5_2_bn3_weight',
 'blocks.5.2.bn3.blocks_5_2_bn3_bias',
 'blocks.5.2.bn3.batch_norm',
 'blocks.5.2.bn3.drop',
 'blocks.5.2.bn3.act',
 'blocks.5.2.drop_path',
 'blocks.5.2.add',
 'blocks.5.3.conv_pw',
 'blocks.5.3.bn1.getattr',
 'blocks.5.3.bn1.eq',
 'blocks.5.3.bn1.getattr_1',
 'blocks.5.3.bn1._assert',
 'blocks.5.3.bn1.blocks_5_3_bn1_weight',
 'blocks.5.3.bn1.blocks_5_3_bn1_bias',
 'blocks.5.3.bn1.batch_norm',
 'blocks.5.3.bn1.drop',
 'blocks.5.3.bn1.act',
 'blocks.5.3.conv_dw',
 'blocks.5.3.bn2.getattr',
 'blocks.5.3.bn2.eq',
 'blocks.5.3.bn2.getattr_1',
 'blocks.5.3.bn2._assert',
 'blocks.5.3.bn2.blocks_5_3_bn2_weight',
 'blocks.5.3.bn2.blocks_5_3_bn2_bias',
 'blocks.5.3.bn2.batch_norm',
 'blocks.5.3.bn2.drop',
 'blocks.5.3.bn2.act',
 'blocks.5.3.se',
 'blocks.5.3.conv_pwl',
 'blocks.5.3.bn3.getattr',
 'blocks.5.3.bn3.eq',
 'blocks.5.3.bn3.getattr_1',
 'blocks.5.3.bn3._assert',
 'blocks.5.3.bn3.blocks_5_3_bn3_weight',
 'blocks.5.3.bn3.blocks_5_3_bn3_bias',
 'blocks.5.3.bn3.batch_norm',
 'blocks.5.3.bn3.drop',
 'blocks.5.3.bn3.act',
 'blocks.5.3.drop_path',
 'blocks.5.3.add',
 'blocks.6.0.conv_pw',
 'blocks.6.0.bn1.getattr',
 'blocks.6.0.bn1.eq',
 'blocks.6.0.bn1.getattr_1',
 'blocks.6.0.bn1._assert',
 'blocks.6.0.bn1.blocks_6_0_bn1_weight',
 'blocks.6.0.bn1.blocks_6_0_bn1_bias',
 'blocks.6.0.bn1.batch_norm',
 'blocks.6.0.bn1.drop',
 'blocks.6.0.bn1.act',
 'blocks.6.0.conv_dw',
 'blocks.6.0.bn2.getattr',
 'blocks.6.0.bn2.eq',
 'blocks.6.0.bn2.getattr_1',
 'blocks.6.0.bn2._assert',
 'blocks.6.0.bn2.blocks_6_0_bn2_weight',
 'blocks.6.0.bn2.blocks_6_0_bn2_bias',
 'blocks.6.0.bn2.batch_norm',
 'blocks.6.0.bn2.drop',
 'blocks.6.0.bn2.act',
 'blocks.6.0.se',
 'blocks.6.0.conv_pwl',
 'blocks.6.0.bn3.getattr',
 'blocks.6.0.bn3.eq',
 'blocks.6.0.bn3.getattr_1',
 'blocks.6.0.bn3._assert',
 'blocks.6.0.bn3.blocks_6_0_bn3_weight',
 'blocks.6.0.bn3.blocks_6_0_bn3_bias',
 'blocks.6.0.bn3.batch_norm',
 'blocks.6.0.bn3.drop',
 'blocks.6.0.bn3.act',
 'conv_head',
 'bn2.getattr',
 'bn2.eq',
 'bn2.getattr_1',
 'bn2._assert',
 'bn2.bn2_weight',
 'bn2.bn2_bias',
 'bn2.batch_norm',
 'bn2.drop',
 'bn2.act',
 'global_pool.pool',
 'global_pool.flatten',
 'classifier.0',
 'classifier.1',
 'classifier.2',
 'classifier.3',
 'classifier.4']


from torchvision.models.feature_extraction import create_feature_extractor

features = {"classifier.0": "out"}
custom_fe = create_feature_extractor(pretrained_model, return_nodes=features)
custom_fe

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): Module(
    (drop): Identity()
    (act): ReLU6(inplace=True)
  )
  (blocks): Module(
    (0): Module(
      (0): Module(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): Identity()
        )
      )
    )
    (1): Module(
      (0): Module(
        (conv_pw): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
      )
      (1): Module(
        (conv_pw): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (2): Module(
      (0): Module(
        (conv_pw): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(144, 144, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=144, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(144, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
      )
      (1): Module(
        (conv_pw): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(240, 240, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=240, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(240, 40, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (3): Module(
      (0): Module(
        (conv_pw): Conv2d(40, 240, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(240, 240, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=240, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(240, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
      )
      (1): Module(
        (conv_pw): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (2): Module(
        (conv_pw): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(480, 480, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=480, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(480, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (4): Module(
      (0): Module(
        (conv_pw): Conv2d(80, 480, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(480, 480, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=480, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(480, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
      )
      (1): Module(
        (conv_pw): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (2): Module(
        (conv_pw): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(672, 672, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=672, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(672, 112, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (5): Module(
      (0): Module(
        (conv_pw): Conv2d(112, 672, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(672, 672, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2), groups=672, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(672, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
      )
      (1): Module(
        (conv_pw): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (2): Module(
        (conv_pw): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
      (3): Module(
        (conv_pw): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(1152, 1152, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), groups=1152, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(1152, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
        (drop_path): Identity()
      )
    )
    (6): Module(
      (0): Module(
        (conv_pw): Conv2d(192, 1152, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (conv_dw): Conv2d(1152, 1152, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1152, bias=False)
        (bn2): Module(
          (drop): Identity()
          (act): ReLU6(inplace=True)
        )
        (se): Identity()
        (conv_pwl): Conv2d(1152, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): Module(
          (drop): Identity()
          (act): Identity()
        )
      )
    )
  )
  (conv_head): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn2): Module(
    (drop): Identity()
    (act): ReLU6(inplace=True)
  )
  (global_pool): Module(
    (pool): AdaptiveAvgPool2d(output_size=1)
    (flatten): Flatten(start_dim=1, end_dim=-1)
  )
  (classifier): Module(
    (0): Linear(in_features=1280, out_features=512, bias=False)
  )
)


x = torch.rand(1, 3, 224, 224)
custom_fe(x)["out"].shape

torch.Size([1, 512])


fe_model = timm.create_model(model_name, pretrained=True, features_only=True)


list(fe_model.feature_info)

[{'stage': 1, 'reduction': 2, 'module': 'blocks.0', 'num_chs': 16, 'index': 0},
 {'stage': 2, 'reduction': 4, 'module': 'blocks.1', 'num_chs': 24, 'index': 1},
 {'stage': 3, 'reduction': 8, 'module': 'blocks.2', 'num_chs': 40, 'index': 2},
 {'stage': 5,
  'reduction': 16,
  'module': 'blocks.4',
  'num_chs': 112,
  'index': 3},
 {'stage': 7,
  'reduction': 32,
  'module': 'blocks.6',
  'num_chs': 320,
  'index': 4}]


out = fe_model(x)
for output in out:
    print(output.shape)

torch.Size([1, 16, 112, 112])
torch.Size([1, 24, 56, 56])
torch.Size([1, 40, 28, 28])
torch.Size([1, 112, 14, 14])
torch.Size([1, 320, 7, 7])


pretrained_model.pretrained_cfg

{'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_lite0_ra-37913777.pth',
 'hf_hub_id': 'timm/efficientnet_lite0.ra_in1k',
 'architecture': 'efficientnet_lite0',
 'tag': 'ra_in1k',
 'custom_load': False,
 'input_size': (3, 224, 224),
 'fixed_input_size': False,
 'interpolation': 'bicubic',
 'crop_pct': 0.875,
 'crop_mode': 'center',
 'mean': (0.485, 0.456, 0.406),
 'std': (0.229, 0.224, 0.225),
 'num_classes': 1000,
 'pool_size': (7, 7),
 'first_conv': 'conv_stem',
 'classifier': 'classifier'}


from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform

transform = create_transform(
    **resolve_data_config(pretrained_model.pretrained_cfg, model=pretrained_model)
)
transform

Compose(
    Resize(size=256, interpolation=bicubic, max_size=None, antialias=warn)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=tensor([0.4850, 0.4560, 0.4060]), std=tensor([0.2290, 0.2240, 0.2250]))
)


from matplotlib import pyplot as plt

logits = [0.1, 0.1, 0.4, 5, 0.1, 0.2, 0.1, 0.2, 3, 0.7]

plt.figure(figsize=(6, 3))
plt.bar(range(0, 10), logits)
plt.xticks(range(0, 10))
plt.show()


import torch
from torch.nn.functional import softmax

probs = softmax(torch.tensor(logits), dim=0)

plt.figure(figsize=(6, 3))
plt.bar(range(0, 10), probs)
plt.xticks(range(0, 10))
plt.show()


one_hot = (probs >= probs.max()).int()
print("One hot ", one_hot)

One hot  tensor([0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype=torch.int32)


import numpy as np

f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 4), sharey=False)


def bar(ax, y, title):
    x = range(0, 10)
    ax.bar(x, np.array(y))
    ax.set_title(title)
    ax.set_yticks([])


bar(ax1, logits, "Logits")
bar(ax2, probs, "Probs")
bar(ax3, one_hot, "Label")

plt.plot()
plt.show()


!pip install -q git+https://github.com/openai/CLIP.git

  Preparing metadata (setup.py) ... done
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 53.4/53.4 kB 1.9 MB/s eta 0:00:00
  Building wheel for clip (setup.py) ... done


img, class_num = microImgNet[200]
show(img, microImgNet.labels[200][1], 0)


texts = [
    "A man with a gasoline saw is getting firewood",
    "Santa Claus sleigh",
    "chain saw",
    "cat",
    "dog",
]


import clip

print(clip.available_models())

['RN50', 'RN101', 'RN50x4', 'RN50x16', 'RN50x64', 'ViT-B/32', 'ViT-B/16', 'ViT-L/14', 'ViT-L/14@336px']


model, preprocess = clip.load("ViT-B/32", device="cpu")

100%|███████████████████████████████████████| 338M/338M [00:03<00:00, 95.9MiB/s]


image = preprocess(img).unsqueeze(0)
text = clip.tokenize(texts)


with torch.no_grad():
    logits_per_image, _ = model(image, text)
    probs = logits_per_image.softmax(dim=-1).numpy()

print(probs)

plt.figure(figsize=(6, 3))
plt.bar(range(len(texts)), probs.flatten())
plt.show()

[[9.912793e-01 5.492549e-06 8.714993e-03 5.615143e-08 2.927707e-07]]


image_features = model.encode_image(image).detach().cpu()
text_features = model.encode_text(text).detach().cpu()
print("Image", image_features.shape)
print("Text", text_features.shape)

Image torch.Size([1, 512])
Text torch.Size([5, 512])


print(np.linalg.norm(image_features[0]))

10.169521


from torch.nn.functional import normalize

image_features = normalize(image_features)
text_features = normalize(text_features)

print(np.linalg.norm(image_features[0].cpu()))
print(np.linalg.norm(text_features.cpu(), axis=1))

0.99999994
[1.        1.0000001 1.        1.        1.0000001]


similarities = []
for t in text_features:
    sim = torch.dot(image_features[0], t)
    similarities.append(sim.item())
print(similarities)

[0.3043335974216461, 0.18330000340938568, 0.2569941282272339, 0.1374690979719162, 0.15398246049880981]


sims = torch.matmul(text_features, image_features.T)
print(sims.detach().cpu().tolist())

[[0.3043336272239685], [0.18330001831054688], [0.2569941580295563], [0.137469083070755], [0.15398246049880981]]


plt.figure(figsize=(6, 3))
plt.bar(range(len(similarities)), similarities)
plt.show()


chilled_sims = sims.flatten() * 100


s = chilled_sims.softmax(dim=0).numpy()
print(s)
plt.figure(figsize=(8, 6))
plt.bar(range(len(s)), s)
plt.show()

[9.912793e-01 5.492538e-06 8.715026e-03 5.615143e-08 2.927707e-07]


descriptions = []
for val in imagenet_labels.values():
    name = val[1].replace("_", " ")
    descriptions.append(f"a photo of {name}")
print(descriptions[0:10])

['a photo of tench', 'a photo of goldfish', 'a photo of great white shark', 'a photo of tiger shark', 'a photo of hammerhead', 'a photo of electric ray', 'a photo of stingray', 'a photo of cock', 'a photo of hen', 'a photo of ostrich']


import clip

img, label = microImgNet[0]

model, preprocess = clip.load("ViT-B/32", device=device)


for i in range(6):
    img, label = microImgNet[i * 6]
    name = microImgNet.labels[i * 6][1]

    image = preprocess(img).unsqueeze(0).to(device)
    text = clip.tokenize(descriptions).to(device)

    with torch.no_grad():
        image_features = model.encode_image(image)
        text_features = model.encode_text(text)

        logits_per_image, logits_per_text = model(image, text)
        probs = logits_per_image.softmax(dim=-1).cpu().numpy()
        class_num = probs.argmax()
        descr = descriptions[class_num]
        show(img, descr, i)

Базовые компоненты свёрточных сетей¶

ImageNet¶

Baseline (AlexNet 2012)¶

Метрики ImageNet¶

Тюнинг гиперпараметров (ZFnet)¶

Базовый блок (VGGNet 2014)¶

Вычислительные ресурсы¶

Фильтры 3×3¶

Память для хранения параметров (VGG16)¶

Контроль GPU¶

Inception module (GoogLeNet 2014)¶

1×1 Convolution¶

Stem network¶

Global Average Pooling¶

Затухание градиента¶

Batchnorm (революция глубины)¶

Skip connection (ResNet 2015)¶

Архитектура ResNet¶

BasicBlock в PyTorch¶

Bottleneck layer¶

Обучение ResNet¶

Обогащение карт признаков¶

Grouped Convolution¶

Grouped Convolution in PyTorch¶

ResNeXt¶

Обзор сети MobileNet (2017 г.)¶

Depthwise separable convolution¶

Inverted residual block¶

Сравнение моделей¶

Много skip connection (DenseNet 2016)¶

Ширина вместо глубины (WideResNet 2016)¶

Squeeze-and-Excitation (SENet 2017)¶

Поиск хорошей архитектуры¶

Обзор сети EfficientNet (2019 г.)¶

Трансформеры¶

ConvNext (2022)¶

Ablation study¶

Процесс обучения¶

Torch Image Models (timm)¶

Custom feature extractor¶

Обучение без разметки¶

Дистилляция¶

Hard targets¶

Soft targets¶

CLIP¶

Как использовать¶

Получение embedding¶

Классификация ImageNet¶

Практические рекомендации¶