import pandas as pd

# load dataset
boston_dataset = pd.read_csv(
    "https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/boston_dataset.csv",
    index_col=0,
)
x_data = boston_dataset.iloc[:, :-1]
y_data = boston_dataset["target"].values

boston_dataset.head()


boston_dataset.describe()


from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

ss = StandardScaler()
x_data_scaled = ss.fit_transform(x_data)

model = LinearRegression()
model.fit(x_data_scaled, y_data)

LinearRegression()

LinearRegression()


linear_importance = pd.DataFrame(
    {"name": boston_dataset.columns[:-1], "coef": model.coef_}
).sort_values("coef", key=abs, ascending=False)

linear_importance


import matplotlib.pyplot as plt
import seaborn as sns

linear_importance["sign"] = linear_importance["coef"].apply(
    lambda x: "neg" if x < 0 else "pos"
)
palette = {"neg": "#1e88e5", "pos": "#ff0d57"}

plt.figure(figsize=(5, 5))
plt.title("Linear model coefficients")
sns.barplot(
    data=linear_importance,
    y="name",
    x="coef",
    hue="sign",
    palette=palette,
    legend=False,
    orient="h",
)
plt.show()


linear_importance["abs(coef)"] = linear_importance["coef"].abs()


import numpy as np
from sklearn.ensemble import RandomForestRegressor

rng = np.random.RandomState(42)
model = RandomForestRegressor(random_state=rng)
model.fit(x_data, y_data)

RandomForestRegressor(random_state=RandomState(MT19937) at 0x7C4118575940)

RandomForestRegressor(random_state=RandomState(MT19937) at 0x7C4118575940)


gini_importance = pd.DataFrame(
    {
        "name": boston_dataset.columns[:-1],
        "feature importances": model.feature_importances_,
    }
).sort_values("feature importances", ascending=False)
gini_importance


plt.figure(figsize=(13, 4))
plt.subplot(1, 2, 1)
plt.title("Linear model")
sns.barplot(
    data=linear_importance,
    y="name",
    x="abs(coef)",
    color=sns.xkcd_rgb["azure"],
    orient="h",
)

plt.subplot(1, 2, 2)
plt.title("Random forest")
sns.barplot(
    data=gini_importance,
    y="name",
    x="feature importances",
    color=sns.xkcd_rgb["azure"],
    orient="h",
)
plt.show()


x_data = boston_dataset.iloc[:, :-1]

num_unique = [x_data[column].nunique() for column in x_data.columns]

num_unique = pd.DataFrame(
    {"name": x_data.columns, "num unique": num_unique}
).sort_values("num unique", ascending=False)

plt.figure(figsize=(5, 5))
plt.title("Random forest")
sns.barplot(
    data=num_unique,
    y="name",
    x="num unique",
    color=sns.xkcd_rgb["azure"],
    orient="h",
)
plt.show()


import pandas as pd

# load dataset
boston_dataset = pd.read_csv(
    "https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/boston_dataset.csv",
    index_col=0,
)
x_data = boston_dataset.iloc[:, :-1]
y_data = boston_dataset["target"].values

boston_dataset.head()


import numpy as np
from sklearn.ensemble import RandomForestRegressor

rng = np.random.RandomState(42)
model = RandomForestRegressor(random_state=rng)
model.fit(x_data, y_data)

RandomForestRegressor(random_state=RandomState(MT19937) at 0x7C4118576C40)

RandomForestRegressor(random_state=RandomState(MT19937) at 0x7C4118576C40)


import matplotlib.pyplot as plt
from sklearn.inspection import PartialDependenceDisplay

_, ax = plt.subplots(ncols=3, figsize=(15, 5), sharey=True, constrained_layout=True)

features_info = {
    "features": ["RM", "LSTAT", "AGE"],
    "kind": "both",
    "centered": False,
}

common_params = {
    "subsample": 50,
    "n_jobs": 2,
    "grid_resolution": 20,
    "random_state": 0,
}

x_data = pd.DataFrame(x_data, columns=boston_dataset.iloc[:, :-1].columns)

display = PartialDependenceDisplay.from_estimator(
    model,
    x_data,
    **features_info,
    ax=ax,
    **common_params,
)


import sklearn
from sklearn.datasets import fetch_20newsgroups

newsgroups_train = fetch_20newsgroups(subset="train")
newsgroups_test = fetch_20newsgroups(subset="test")
# making class names shorter
class_names = [
    x.split(".")[-1] if "misc" not in x else ".".join(x.split(".")[-2:])
    for x in newsgroups_train.target_names
]
class_names[3] = "pc.hardware"
class_names[4] = "mac.hardware"

for i, class_name in enumerate(class_names):
    print(f"{i:<3d}{class_name}")

0  atheism
1  graphics
2  ms-windows.misc
3  pc.hardware
4  mac.hardware
5  x
6  misc.forsale
7  autos
8  motorcycles
9  baseball
10 hockey
11 crypt
12 electronics
13 med
14 space
15 christian
16 guns
17 mideast
18 politics.misc
19 religion.misc


import sklearn.metrics
from sklearn.naive_bayes import MultinomialNB

# Again, let's use the tfidf vectorizer, commonly used for text.
vectorizer = sklearn.feature_extraction.text.TfidfVectorizer(lowercase=False)
train_vectors = vectorizer.fit_transform(newsgroups_train.data)
test_vectors = vectorizer.transform(newsgroups_test.data)

# Train the model
model_nb = MultinomialNB(alpha=0.01)
model_nb.fit(train_vectors, newsgroups_train.target)

# Calculate F1_score
pred = model_nb.predict(test_vectors)

f1_metric = sklearn.metrics.f1_score(newsgroups_test.target, pred, average="weighted")

print(f"f1-score on test: {f1_metric:.3f}")

f1-score on test: 0.835


from sklearn.pipeline import make_pipeline

# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html
model_with_preprocessing = make_pipeline(vectorizer, model_nb)


!pip -q install lime

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 275.7/275.7 kB 6.9 MB/s eta 0:00:00
  Preparing metadata (setup.py) ... done
  Building wheel for lime (setup.py) ... done


import lime
from lime.lime_text import LimeTextExplainer


explainer = LimeTextExplainer(class_names=class_names, random_state=42)
idx = 1340
exp = explainer.explain_instance(
    newsgroups_test.data[idx],
    model_with_preprocessing.predict_proba,
    num_features=10,
    labels=[0, 15],
)


print(exp.as_list(label=0))
print(exp.as_list(label=15))

[('Caused', 0.25322531210147387), ('Rice', 0.13595123602639667), ('Genocide', 0.11768387005657018), ('scri', -0.09855597458646022), ('certainty', -0.09476293913546043), ('owlnet', -0.0934907172908853), ('Semitic', -0.09131694930901126), ('Theism', 0.0784110699063054), ('justices', 0.05556474240520185), ('Heck', 0.033360524240830865)]
[('Caused', -0.1744987865761024), ('fsu', 0.11120723769711544), ('scri', 0.10146389399943065), ('certainty', 0.09916521316107652), ('owlnet', 0.09808331888483239), ('Genocide', -0.07795538004971421), ('Rice', -0.051919681057782656), ('Hitler', -0.04857271879454341), ('Heck', -0.03965035614391036), ('justices', -0.029307637688004165)]


exp.show_in_notebook(text=newsgroups_test.data[idx], labels=(0,))


exp.show_in_notebook(text=newsgroups_test.data[idx], labels=(15,))


!wget -q 'https://edunet.kea.su/repo/EduNet-web_dependencies/dev-2.0/L14/cat_and_dog1.jpg' -O cat_and_dog1.jpg
!wget -q 'https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/imagenet_class_index.json' -O imagenet_class_index.json


import os
from PIL import Image


def get_image(path):
    with open(os.path.abspath(path), "rb") as f:
        with Image.open(f) as img:
            return img.convert("RGB")


img = get_image("cat_and_dog1.jpg")
plt.imshow(img)
plt.axis("off")
plt.show()


from torchvision import transforms


# resize & normalize


def get_input_transform():
    transform = transforms.Compose(
        [
            transforms.Resize(224),
            transforms.CenterCrop((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
            ),
        ]
    )
    return transform


# for get croped img from input tensor


def get_reverse_transform():
    transform = transforms.Compose(
        [
            transforms.Normalize(
                mean=(0.0, 0.0, 0.0), std=(1 / 0.229, 1 / 0.224, 1 / 0.225)
            ),
            transforms.Normalize(
                mean=(-0.485, -0.456, -0.406),
                std=(1.0, 1.0, 1.0),
            ),
            transforms.Lambda(lambda x: torch.permute(x, (0, 2, 3, 1))),
            transforms.Lambda(lambda x: x.detach().numpy()),
        ]
    )
    return transform


def get_input_tensors(img):
    transform = get_input_transform()
    # unsqeeze converts single image to batch of 1
    return transform(img).unsqueeze(0)


def get_crop_img(img_tensor):
    transform = get_reverse_transform()
    return transform(img_tensor)[0]


import json
import torch
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet18(weights="ResNet18_Weights.DEFAULT")

idx2label, cls2label, cls2idx = [], {}, {}
with open(os.path.abspath("/content/imagenet_class_index.json"), "r") as read_file:
    class_idx = json.load(read_file)
    idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))]
    lable2idx = {class_idx[str(k)][1]: k for k in range(len(class_idx))}

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 111MB/s]


import torch.nn.functional as F

img_t = get_input_tensors(img)
model = model.to(device)
model.eval()
logits = model(img_t.to(device))

probs = F.softmax(logits, dim=1)
probs5 = probs.topk(5)
plt.imshow(get_crop_img(img_t))
plt.axis("off")
plt.show()
tuple(
    (p, c, idx2label[c])
    for p, c in zip(
        probs5[0][0].detach().cpu().numpy(), probs5[1][0].detach().cpu().numpy()
    )
)

((0.4030773, 235, 'German_shepherd'),
 (0.095317766, 281, 'tabby'),
 (0.06907895, 282, 'tiger_cat'),
 (0.026926216, 285, 'Egyptian_cat'),
 (0.02285185, 811, 'space_heater'))


!pip install -q lime


def batch_predict(images):  # images are numpy arrays
    model.eval()
    transform = get_input_transform()
    batch = torch.stack(tuple(transform(Image.fromarray(i)) for i in images), dim=0)

    model.to(device)
    batch = batch.to(device)

    logits = model(batch)
    probs = F.softmax(logits, dim=1)
    return probs.detach().cpu().numpy()


import lime
from lime import lime_image

img_t = get_input_tensors(img)

explainer = lime_image.LimeImageExplainer(random_state=42)
explanation = explainer.explain_instance(
    np.array(255 * get_crop_img(img_t)).astype(
        np.uint8
    ),  # LIME assume that input is a numpy array :(
    batch_predict,  # classification function
    top_labels=5,
    hide_color=0,
    num_samples=1000,  # number of images that will be sent to classification function
    random_seed=42,
)

  0%|          | 0/1000 [00:00<?, ?it/s]


for i, id in enumerate(explanation.top_labels):
    print(i, idx2label[id])

0 German_shepherd
1 tabby
2 tiger_cat
3 Egyptian_cat
4 space_heater


from skimage.segmentation import mark_boundaries

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 10))

for i, id in enumerate(explanation.top_labels[:2]):
    temp, mask = explanation.get_image_and_mask(
        id, positive_only=False, num_features=5, hide_rest=False
    )
    img_boundry = mark_boundaries(temp, mask)
    ax[i].imshow(img_boundry)
    ax[i].set_title(idx2label[id])
    ax[i].axis("off")
    # number of clusters to be shown in the image: num_features=5
    # show or not negatively impacting clusters: positive_only=False
    # first 5 may be only positive


!pip install -q shap

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 538.2/538.2 kB 9.2 MB/s eta 0:00:00


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor


# load dataset
boston_dataset = pd.read_csv(
    "https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/boston_dataset.csv",
    index_col=0,
)
x_data = boston_dataset.iloc[:, :-1]
y_data = boston_dataset["target"]

# Split the data into train and test data
x_train, x_test, y_train, y_test = train_test_split(
    x_data, y_data, test_size=0.2, random_state=42
)

# Build the model with the random forest regression algorithm
rng = np.random.RandomState(42)
model = RandomForestRegressor(n_jobs=-1, max_depth=4, random_state=rng)
model.fit(x_train, y_train)

RandomForestRegressor(max_depth=4, n_jobs=-1,
                      random_state=RandomState(MT19937) at 0x7C4117183A40)

RandomForestRegressor(max_depth=4, n_jobs=-1,
                      random_state=RandomState(MT19937) at 0x7C4117183A40)


import shap

# explain the model's predictions using SHAP
# (same syntax works for LightGBM, CatBoost, scikit-learn and spark models)
explainer = shap.TreeExplainer(model)
explanations = explainer(x_test)


print(type(explanations))

print(explanations)

<class 'shap._explanation.Explanation'>
.values =
array([[ 3.59513250e-01, -7.57494185e-04, -5.36065189e-03, ...,
         3.75769950e-02,  1.09907495e-02,  1.56735456e+00],
       [ 3.23542906e-01, -6.30312450e-02, -3.63459255e-02, ...,
         9.69022019e-03, -1.04828771e-03,  5.44875225e+00],
       [ 1.05924802e+00, -2.12919386e-03, -1.16701150e-02, ...,
        -2.49177497e-01,  5.58777181e-02, -4.13010066e+00],
       ...,
       [-9.90074478e-01, -2.18585538e-03, -1.43130276e-02, ...,
        -2.21104649e-01, -1.84161230e-01, -8.30349060e+00],
       [ 3.71187999e-01, -2.37094303e-03,  1.39199662e-02, ...,
        -3.90942703e-02,  1.69203421e-02,  7.06083619e-01],
       [ 3.61234522e-01, -2.37094303e-03, -3.34938142e-02, ...,
        -4.71612736e-02,  3.60390550e-02,  1.83550067e+00]])

.base_values =
array([22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698, 22.8867698, 22.8867698, 22.8867698,
       22.8867698, 22.8867698])

.data =
array([[9.17800e-02, 0.00000e+00, 4.05000e+00, ..., 1.66000e+01,
        3.95500e+02, 9.04000e+00],
       [5.64400e-02, 4.00000e+01, 6.41000e+00, ..., 1.76000e+01,
        3.96900e+02, 3.53000e+00],
       [1.05740e-01, 0.00000e+00, 2.77400e+01, ..., 2.01000e+01,
        3.90110e+02, 1.80700e+01],
       ...,
       [1.40507e+01, 0.00000e+00, 1.81000e+01, ..., 2.02000e+01,
        3.50500e+01, 2.12200e+01],
       [5.18800e-02, 0.00000e+00, 4.49000e+00, ..., 1.85000e+01,
        3.95990e+02, 1.28600e+01],
       [9.51200e-02, 0.00000e+00, 1.28300e+01, ..., 1.87000e+01,
        3.83230e+02, 8.94000e+00]])


# load JS visualization code to notebook
shap.initjs()
# visualize the first prediction’s explanation
shap.plots.force(explanations[0])


# visualize the first prediction's explanation using waterfall

shap.plots.waterfall(explanations[0])


# load JS visualization code to notebook
shap.initjs()
# visualize the training set predictions
shap.force_plot(explanations)


shap.plots.bar(explanations)


!pip install -q transformers[sentencepiece]


import torch
import transformers
from transformers import AutoModelForSeq2SeqLM
from IPython.display import clear_output

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

lang = "en"
target_lang = "ru"
model_name = f"Helsinki-NLP/opus-mt-{lang}-{target_lang}"

# Download the model and the tokenizer
# can also try translation with different pre-trained models

# It's a Factory pattern
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
model.to(device)
clear_output()
print(type(model))

<class 'transformers.models.marian.modeling_marian.MarianMTModel'>


!pip install -q sacremoses

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 897.5/897.5 kB 10.9 MB/s eta 0:00:00


from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)

clear_output()

print(type(tokenizer))

input = tokenizer("Hello world!", return_tensors="pt")
print(input)

translated = model.generate(
    **tokenizer("Hello world!", return_tensors="pt").to(device), max_new_tokens=512
)
# ** -  is dictionary unpack operator
# https://pavel-karateev.gitbook.io/intermediate-python/sintaksis/args_and_kwargs

<class 'transformers.models.marian.tokenization_marian.MarianTokenizer'>
{'input_ids': tensor([[ 160, 5270,  564,   56,    0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}


import shap

# define the input sentences we want to translate
data = [
    "Transformers are a type of neural network architecture that have been gaining popularity. Transformers were developed to solve the problem of sequence transduction, or neural machine translation."
    "That means any task that transforms an input sequence to an output sequence. This includes speech recognition, text-to-speech transformation, etc.."
]

# we build an explainer by passing the model we want to explain and
# the tokenizer we want to use to break up the input strings
explainer = shap.Explainer(model, tokenizer, max_new_tokens=512)

# explainers are callable, just like models
explanation = explainer(data)
clear_output()


print("Data", explanation.data)
print("SHAP values", explanation.values)
print("Shape", explanation.shape)  # 1, in, out

Data (array(['▁Trans', 'former', 's', '▁are', '▁a', '▁type', '▁of', '▁ne',
       'ural', '▁network', '▁architecture', '▁that', '▁have', '▁been',
       '▁gaining', '▁popularity', '.', '▁Trans', 'former', 's', '▁were',
       '▁developed', '▁to', '▁solve', '▁the', '▁problem', '▁of',
       '▁sequence', '▁trans', 'duct', 'ion', ',', '▁or', '▁ne', 'ural',
       '▁machine', '▁translation', '.', 'That', '▁means', '▁any', '▁task',
       '▁that', '▁transform', 's', '▁an', '▁input', '▁sequence', '▁to',
       '▁an', '▁output', '▁sequence', '.', '▁This', '▁includes',
       '▁speech', '▁recognition', ',', '▁text', '-', 'to', '-', 's', 'pe',
       'ech', '▁transformation', ',', '▁etc', '.', '.', ''], dtype=object),)
SHAP values [[[ 1.39694700e+00  5.75616847e+00 -5.07417767e-01 ... -2.29897152e-02
    2.00847441e-02 -9.21574658e-03]
  [ 8.24752197e-01  2.37404535e+00  2.44451605e+00 ... -3.03489130e-03
    3.48355277e-03  1.34331505e-03]
  [ 6.21933990e-01  3.28069819e-01  2.84975874e-01 ... -6.09600328e-04
   -1.29937957e-03  8.10789659e-04]
  ...
  [-2.12584656e-02  6.46664753e-02  7.64958889e-03 ...  2.91938526e-04
    4.99640948e-02  2.33951153e-02]
  [ 4.59862546e-03 -5.98606269e-01  1.26282991e-01 ...  5.25125547e-02
    3.01344616e-02  2.71421814e-02]
  [-2.75666469e-02 -1.07679722e-01  1.55281482e-02 ... -8.65370207e-03
   -2.89849946e-02  6.64150443e-03]]]
Shape (1, 71, 79)


shap.initjs()
shap.plots.text(explanation)


!pip install -q datasets

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 542.0/542.0 kB 6.5 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 116.3/116.3 kB 7.5 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 194.1/194.1 kB 9.6 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.8/134.8 kB 8.4 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 388.9/388.9 kB 12.1 MB/s eta 0:00:00


import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-xsum-12-6")
model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-xsum-12-6").to(
    device
)


dataset = load_dataset("xsum", split="train", trust_remote_code=True)  # load dataset
s = dataset["document"][0:1]  # slice inputs from dataset to run model inference on
explainer = shap.Explainer(model, tokenizer)  # create an explainer object
explanation = explainer(s)  # Compute shap values
clear_output()


shap.initjs()
shap.plots.text(explanation)  # Visualize shap explanations


!wget -q 'https://edunet.kea.su/repo/EduNet-web_dependencies/dev-2.0/L14/cat_and_dog1.jpg' -O cat_and_dog1.jpg
!wget -q 'https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/imagenet_class_index.json' -O imagenet_class_index.json


import os
import matplotlib.pyplot as plt
from PIL import Image


def get_image(path):
    with open(os.path.abspath(path), "rb") as f:
        with Image.open(f) as img:
            return img.convert("RGB")


img = get_image("cat_and_dog1.jpg")
plt.rcParams["figure.figsize"] = (5, 5)
plt.imshow(img)
plt.axis("off")
plt.show()


from torchvision import transforms


# resize & normalize


def get_input_transform():
    transform = transforms.Compose(
        [
            transforms.Resize(224),
            transforms.CenterCrop((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225),
            ),
        ]
    )
    return transform


# for get croped img from input tensor


def get_reverse_transform():
    transform = transforms.Compose(
        [
            transforms.Normalize(
                mean=(0.0, 0.0, 0.0), std=(1 / 0.229, 1 / 0.224, 1 / 0.225)
            ),
            transforms.Normalize(
                mean=(-0.485, -0.456, -0.406),
                std=(1.0, 1.0, 1.0),
            ),
            transforms.Lambda(lambda x: torch.permute(x, (0, 2, 3, 1))),
            transforms.Lambda(lambda x: x.detach().numpy()),
        ]
    )
    return transform


def get_input_tensors(img):
    transform = get_input_transform()
    # unsqeeze converts single image to batch of 1
    return transform(img).unsqueeze(0)


def get_crop_img(img_tensor):
    transform = get_reverse_transform()
    return transform(img_tensor)[0]


import json
import torch
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet18(weights="ResNet18_Weights.DEFAULT")

idx2label, cls2label, cls2idx = [], {}, {}
with open(os.path.abspath("/content/imagenet_class_index.json"), "r") as read_file:
    class_idx = json.load(read_file)
    idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))]
    lable2idx = {class_idx[str(k)][1]: k for k in range(len(class_idx))}


print(type(img))
img_t = get_input_tensors(img)

model.to(device)
model.eval()
logits = model(img_t.to(device))

<class 'PIL.Image.Image'>


import torch.nn.functional as F


def top_k_class(logits, k=6):
    prediction = F.softmax(logits, dim=1)
    top_props, top_inds = prediction.topk(k)

    for i in range(k):
        category_name = idx2label[top_inds[0][i].item()]
        score = top_props[0][i].item()
        print(f"{category_name} {top_inds[0][i].item()}: {100 * score:.1f}%")


top_k_class(logits)

German_shepherd 235: 40.3%
tabby 281: 9.5%
tiger_cat 282: 6.9%
Egyptian_cat 285: 2.7%
space_heater 811: 2.3%
malinois 225: 1.2%


img_t.requires_grad = True  # Tell pytorch to compute grads w.r.t. inputs too

logits = model(img_t.to(device))  # [1,1000] batch of one element, 1000 class scores
top_score, top_idx = logits[0].topk(1)  # Get id of class with best score
id = top_idx[0].item()

print(id, idx2label[id])  # Print the label this class

score = logits[:, id]  # Model output for particular class

235 German_shepherd


# Compute gradients
score.backward(retain_graph=True)

# retain_grad = True is not nessesary
# But if we run this code second time, we got a torch error without it
# because pytorch want to accumulate gradients explicitly

print(img_t.grad.shape)

torch.Size([1, 3, 224, 224])


import numpy as np
from matplotlib import pylab as P


# Helper method to display grad
def grad_to_image(raw_grads, percentile=99):
    gradients = raw_grads.detach().cpu().numpy()
    gradients = np.transpose(gradients, (0, 2, 3, 1))[0]

    image_2d = np.sum(np.abs(gradients), axis=2)

    vmax = np.percentile(image_2d, percentile)
    vmin = np.min(image_2d)

    return np.clip((image_2d - vmin) / (vmax - vmin), 0, 1)


def plot_saliency_map(img_tensor, saliency_map):
    plt.rcParams["figure.figsize"] = (10, 5)
    plt.subplot(1, 2, 1)
    img = get_crop_img(img_t)
    plt.imshow(img)
    plt.axis("off")
    plt.subplot(1, 2, 2)
    plt.imshow(saliency_map, cmap=P.cm.gray, vmin=0, vmax=1)
    plt.axis("off")
    plt.show()


saliency_map = grad_to_image(img_t.grad)
plot_saliency_map(img_t, saliency_map)


!pip install -q saliency

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 86.2/86.2 kB 2.4 MB/s eta 0:00:00


import numpy as np
import saliency.core as saliency


model = models.resnet18(weights="ResNet18_Weights.DEFAULT")


def call_model_function(img, call_model_args=None, expected_keys=None):
    img_t = torch.tensor(np.transpose(img, (0, 3, 1, 2)))
    transform = transforms.Normalize(
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225),
    )
    img_t = transform(img_t)
    img_t.requires_grad_(True)

    model.to(device)
    model.eval()
    logits = model(img_t.float().to(device))

    top_score, top_idx = logits[0].topk(1)  # Get id of class with best score
    target_class_idx = top_idx[0].item()

    output = logits[:, target_class_idx]
    grads = torch.autograd.grad(
        output, img_t, grad_outputs=torch.ones_like(output)
    )  # output[:, target_class_idx]
    grads = torch.movedim(grads[0], 1, 3)
    gradients = grads.detach().numpy()
    return {saliency.base.INPUT_OUTPUT_GRADIENTS: gradients}


img = get_image("cat_and_dog1.jpg")
img_t = get_input_tensors(img)
img_arr = get_crop_img(img_t)

gradient_saliency = saliency.GradientSaliency()

vanilla_mask_3d = gradient_saliency.GetMask(img_arr, call_model_function)
smoothgrad_mask_3d = gradient_saliency.GetSmoothedMask(img_arr, call_model_function)

# Call the visualization methods to convert the 3D tensors to 2D grayscale.
vanilla_mask_grayscale = saliency.VisualizeImageGrayscale(vanilla_mask_3d)
smoothgrad_mask_grayscale = saliency.VisualizeImageGrayscale(smoothgrad_mask_3d)


from matplotlib import pylab as P


def ShowGrayscaleImage(im, title="", ax=None):
    if ax is None:
        P.figure()
    P.axis("off")
    P.imshow(im, cmap=P.cm.gray, vmin=0, vmax=1)
    P.title(title)


# Set up matplot lib figures.
plt.rcParams["figure.figsize"] = (15, 5)

plt.subplot(1, 3, 1)
plt.imshow(img_arr)
plt.axis("off")

ShowGrayscaleImage(
    vanilla_mask_grayscale, title="Vanilla Gradient", ax=P.subplot(1, 3, 2)
)
ShowGrayscaleImage(smoothgrad_mask_grayscale, title="SmoothGrad", ax=P.subplot(1, 3, 3))


! pip install -q captum

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.3/1.3 MB 13.2 MB/s eta 0:00:00


output = model(img_t.to(device))
output = F.softmax(output, dim=1)
prediction_score, pred_label_idx = torch.topk(output, 1)

pred_label_idx.squeeze_()
predicted_label = idx2label[pred_label_idx.item()]
print("Predicted:", predicted_label, "(", prediction_score.squeeze().item(), ")")

Predicted: German_shepherd ( 0.40307730436325073 )


from captum.attr import IntegratedGradients


integrated_gradients = IntegratedGradients(model)
attributions_ig = integrated_gradients.attribute(
    img_t.to(device), target=pred_label_idx, n_steps=200
)


saliency_map = grad_to_image(attributions_ig)

# Set up matplot lib figures.
plt.rcParams["figure.figsize"] = (20, 5)

plt.subplot(1, 4, 1)
plt.imshow(img_arr)
plt.axis("off")

ShowGrayscaleImage(
    vanilla_mask_grayscale, title="Vanilla Gradient", ax=P.subplot(1, 4, 2)
)
ShowGrayscaleImage(smoothgrad_mask_grayscale, title="SmoothGrad", ax=P.subplot(1, 4, 3))
ShowGrayscaleImage(saliency_map, title="Integrated Gradients", ax=P.subplot(1, 4, 4))


from torchsummary import summary

model = models.resnet18(weights="ResNet18_Weights.DEFAULT")

summary(model.to("cpu"), (3, 224, 224), device="cpu")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64, 56, 56]               0
           Conv2d-15           [-1, 64, 56, 56]          36,864
      BatchNorm2d-16           [-1, 64, 56, 56]             128
             ReLU-17           [-1, 64, 56, 56]               0
       BasicBlock-18           [-1, 64, 56, 56]               0
           Conv2d-19          [-1, 128, 28, 28]          73,728
      BatchNorm2d-20          [-1, 128, 28, 28]             256
             ReLU-21          [-1, 128, 28, 28]               0
           Conv2d-22          [-1, 128, 28, 28]         147,456
      BatchNorm2d-23          [-1, 128, 28, 28]             256
           Conv2d-24          [-1, 128, 28, 28]           8,192
      BatchNorm2d-25          [-1, 128, 28, 28]             256
             ReLU-26          [-1, 128, 28, 28]               0
       BasicBlock-27          [-1, 128, 28, 28]               0
           Conv2d-28          [-1, 128, 28, 28]         147,456
      BatchNorm2d-29          [-1, 128, 28, 28]             256
             ReLU-30          [-1, 128, 28, 28]               0
           Conv2d-31          [-1, 128, 28, 28]         147,456
      BatchNorm2d-32          [-1, 128, 28, 28]             256
             ReLU-33          [-1, 128, 28, 28]               0
       BasicBlock-34          [-1, 128, 28, 28]               0
           Conv2d-35          [-1, 256, 14, 14]         294,912
      BatchNorm2d-36          [-1, 256, 14, 14]             512
             ReLU-37          [-1, 256, 14, 14]               0
           Conv2d-38          [-1, 256, 14, 14]         589,824
      BatchNorm2d-39          [-1, 256, 14, 14]             512
           Conv2d-40          [-1, 256, 14, 14]          32,768
      BatchNorm2d-41          [-1, 256, 14, 14]             512
             ReLU-42          [-1, 256, 14, 14]               0
       BasicBlock-43          [-1, 256, 14, 14]               0
           Conv2d-44          [-1, 256, 14, 14]         589,824
      BatchNorm2d-45          [-1, 256, 14, 14]             512
             ReLU-46          [-1, 256, 14, 14]               0
           Conv2d-47          [-1, 256, 14, 14]         589,824
      BatchNorm2d-48          [-1, 256, 14, 14]             512
             ReLU-49          [-1, 256, 14, 14]               0
       BasicBlock-50          [-1, 256, 14, 14]               0
           Conv2d-51            [-1, 512, 7, 7]       1,179,648
      BatchNorm2d-52            [-1, 512, 7, 7]           1,024
             ReLU-53            [-1, 512, 7, 7]               0
           Conv2d-54            [-1, 512, 7, 7]       2,359,296
      BatchNorm2d-55            [-1, 512, 7, 7]           1,024
           Conv2d-56            [-1, 512, 7, 7]         131,072
      BatchNorm2d-57            [-1, 512, 7, 7]           1,024
             ReLU-58            [-1, 512, 7, 7]               0
       BasicBlock-59            [-1, 512, 7, 7]               0
           Conv2d-60            [-1, 512, 7, 7]       2,359,296
      BatchNorm2d-61            [-1, 512, 7, 7]           1,024
             ReLU-62            [-1, 512, 7, 7]               0
           Conv2d-63            [-1, 512, 7, 7]       2,359,296
      BatchNorm2d-64            [-1, 512, 7, 7]           1,024
             ReLU-65            [-1, 512, 7, 7]               0
       BasicBlock-66            [-1, 512, 7, 7]               0
AdaptiveAvgPool2d-67            [-1, 512, 1, 1]               0
           Linear-68                 [-1, 1000]         513,000
================================================================
Total params: 11,689,512
Trainable params: 11,689,512
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 62.79
Params size (MB): 44.59
Estimated Total Size (MB): 107.96
----------------------------------------------------------------


from collections import defaultdict


def get_forward_hook(history_dict, key):
    def forward_hook(self, input_, output):
        history_dict[key] = output.detach().clone()

    return forward_hook


def register_model_hooks(model):
    hooks_data_history = defaultdict(list)
    forward_hook = get_forward_hook(hooks_data_history, "feature_map")
    model._modules["layer4"].register_forward_hook(forward_hook)
    return hooks_data_history


img_t = get_input_tensors(img)


model = model.eval()
history = register_model_hooks(model)
output = model(img_t)

print(history["feature_map"].shape)

torch.Size([1, 512, 7, 7])


plt.figure(figsize=(15, 10))

for i in range(6):
    plt.subplot(2, 3, i + 1)
    plt.imshow(img_arr)
    plt.imshow(
        history["feature_map"][0][i],
        alpha=0.6,
        extent=(0, 224, 224, 0),
        interpolation="nearest",
        cmap="jet",
    )
    plt.axis("off")

plt.show()


import torch.nn.functional as F

number_of_top_classes = 6

prediction = F.softmax(output, dim=1)
top_props, top_inds = prediction.topk(number_of_top_classes)


for i in range(number_of_top_classes):
    category_name = idx2label[top_inds[0][i].item()]
    score = top_props[0][i].item()
    print(f"{category_name} {top_inds[0][i].item()}: {100 * score:.1f}%")

German_shepherd 235: 40.0%
tabby 281: 7.0%
tiger_cat 282: 4.0%
space_heater 811: 3.0%
Egyptian_cat 285: 2.8%
malinois 225: 1.2%


def get_backward_hook(history_dict, key):
    def backward_hook(self, grad_input_, grad_output):  # for tensors
        history_dict[key] = (
            grad_output[0].detach().clone().mean(dim=[2, 3], keepdim=True)
        )

    return backward_hook


def register_model_hooks(model):
    hooks_data_history = defaultdict(list)

    forward_hook = get_forward_hook(hooks_data_history, "feature_map")
    model._modules["layer4"].register_forward_hook(forward_hook)

    backward_hook = get_backward_hook(hooks_data_history, "weight")
    model._modules["layer4"].register_full_backward_hook(backward_hook)
    return hooks_data_history


def get_cam_map(model, img, class_num):
    history = register_model_hooks(model)

    output = model.eval()(img)
    activation = history["feature_map"]

    output[0, class_num].backward()
    weight = history["weight"]

    cam_map = F.relu((weight[0] * activation[0]).sum(0)).detach().cpu()
    return cam_map


plt.figure(figsize=(15, 10))

for i in range(6):
    cam_map = get_cam_map(model, img_t, top_inds[0][i])

    plt.subplot(2, 3, i + 1)
    plt.imshow(img_arr)
    plt.imshow(
        cam_map,
        alpha=0.6,
        extent=(0, 224, 224, 0),
        interpolation="nearest",
        cmap="jet",
    )
    plt.title(idx2label[top_inds[0][i].item()])
    plt.axis("off")

plt.show()


plt.figure(figsize=(15, 10))

for i in range(6):
    cam_map = get_cam_map(model, img_t, top_inds[0][i])

    plt.subplot(2, 3, i + 1)
    plt.imshow(img_arr)
    plt.imshow(
        cam_map,
        alpha=0.6,
        extent=(0, 224, 224, 0),
        interpolation="bilinear",
        cmap="jet",
    )
    plt.title(idx2label[top_inds[0][i].item()])
    plt.axis("off")

plt.show()


!pip install -q grad-cam

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.8/7.8 MB 25.6 MB/s eta 0:00:00
  Installing build dependencies ... done
  Getting requirements to build wheel ... done
  Preparing metadata (pyproject.toml) ... done
  Building wheel for grad-cam (pyproject.toml) ... done


from pytorch_grad_cam import GradCAM

target_layers = [model._modules["layer4"]]

cam = GradCAM(model=model, target_layers=target_layers)


from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

plt.figure(figsize=(15, 10))

for i in range(6):
    target = [ClassifierOutputTarget(top_inds[0][i])]
    cam_map = cam(input_tensor=img_t, targets=target)

    plt.subplot(2, 3, i + 1)
    plt.imshow(img_arr)
    plt.imshow(cam_map[0], alpha=0.6, interpolation="bilinear", cmap="jet")
    plt.title(idx2label[top_inds[0][i].item()])
    plt.axis("off")

plt.show()


from pytorch_grad_cam import GradCAMPlusPlus

target_layers = [model._modules["layer4"]]
cam_plus = GradCAMPlusPlus(model=model, target_layers=target_layers)


from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

plt.figure(figsize=(15, 10))

for i in range(6):
    target = [ClassifierOutputTarget(top_inds[0][i])]
    cam_map = cam_plus(input_tensor=img_t, targets=target)

    plt.subplot(2, 3, i + 1)
    plt.imshow(img_arr)
    plt.imshow(cam_map[0], alpha=0.6, interpolation="bilinear", cmap="jet")
    plt.title(idx2label[top_inds[0][i].item()])
    plt.axis("off")

plt.show()


!pip install -q transformers[sentencepiece]


import transformers
from transformers import BertTokenizerFast, AutoModelForSequenceClassification
from IPython.display import clear_output

tokenizer = BertTokenizerFast.from_pretrained(
    "blanchefort/rubert-base-cased-sentiment",
)
model = AutoModelForSequenceClassification.from_pretrained(
    "blanchefort/rubert-base-cased-sentiment",
    output_attentions=True,  # for save attention
)
clear_output()


sentences = [
    "Мама мыла раму",
    "Фильм сделан откровенно плохо",
    "Максимально скучный сериал, где сюжет высосан из пальца",
    "Я был в восторге",
    "В общем, кино хорошее и есть много что пообсуждать",
]

tokens = [
    ["[cls]"] + tokenizer.tokenize(sentence) + ["[sep]"] for sentence in sentences
]


item = 0
print(f"Tokens: {tokens[item]}")
token_ids = [tokenizer.encode(sentence) for sentence in sentences]
print(f"Token ids: {token_ids[item]}")

Tokens: ['[cls]', 'Мама', 'мыла', 'раму', '[sep]']
Token ids: [101, 10871, 49053, 53954, 102]


import torch


ans = {0: "NEUTRAL", 1: "POSITIVE", 2: "NEGATIVE"}

for item in range(5):
    input_ids = torch.tensor([token_ids[item]])
    model_output = model(input_ids)
    predicted = torch.argmax(model_output.logits, dim=1).numpy()
    print(f"Text: {sentences[item]}")
    print(f"Predict lable = {predicted}, {ans[predicted.item()]}")

Text: Мама мыла раму
Predict lable = [0], NEUTRAL
Text: Фильм сделан откровенно плохо
Predict lable = [2], NEGATIVE
Text: Максимально скучный сериал, где сюжет высосан из пальца
Predict lable = [2], NEGATIVE
Text: Я был в восторге
Predict lable = [1], POSITIVE
Text: В общем, кино хорошее и есть много что пообсуждать
Predict lable = [0], NEUTRAL


item = 1
input_ids = torch.tensor([token_ids[item]])
model_output = model(input_ids)

attentions = model_output.attentions
print(f"Text: {sentences[item]}")
print(f"Tokens: {tokens[item]}")
print(f"Number of layers: {len(attentions)}")
print(
    f"Attention size: {attentions[0].shape} "
    "[batch x attention_heads x seq_size x seq_size]"
)

Text: Фильм сделан откровенно плохо
Tokens: ['[cls]', 'Фильм', 'сделан', 'откровенно', 'плохо', '[sep]']
Number of layers: 12
Attention size: torch.Size([1, 12, 6, 6]) [batch x attention_heads x seq_size x seq_size]


import numpy as np


def to_array(attentions):
    attentions_arr = [attention.detach().numpy() for attention in attentions]
    return np.asarray(attentions_arr)[:, 0]


attentions_arr = to_array(attentions)
print(
    f"Shape: {attentions_arr.shape} " "[layers x attention_heads x seq_size x seq_size]"
)
print(f"Type: {type(attentions_arr)}, Dtype: {attentions_arr.dtype}")

Shape: (12, 12, 6, 6) [layers x attention_heads x seq_size x seq_size]
Type: <class 'numpy.ndarray'>, Dtype: float32


import seaborn as sns
import matplotlib.pyplot as plt

x_ticks = tokens[item]
y_ticks = tokens[item]

sns.heatmap(
    data=attentions_arr[0][0],
    vmin=0,
    vmax=1,
    xticklabels=x_ticks,
    yticklabels=y_ticks,
    cmap="YlOrRd",
)

plt.show()


!pip install -q bertviz

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 157.6/157.6 kB 4.9 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 139.3/139.3 kB 11.4 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 12.2/12.2 MB 44.4 MB/s eta 0:00:00
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 82.2/82.2 kB 12.7 MB/s eta 0:00:00


import bertviz
from bertviz import head_view

head_view(model_output.attentions, tokens[item])


attention_head_mean = attentions_arr.mean(axis=1)
print(f"{attention_head_mean.shape} [layers x seq_size x seq_size]")

(12, 6, 6) [layers x seq_size x seq_size]


x_ticks = tokens[item]
y_ticks = tokens[item]

sns.heatmap(
    data=attention_head_mean[0],
    vmin=0,
    vmax=1,
    xticklabels=x_ticks,
    yticklabels=y_ticks,
    cmap="YlOrRd",
)

plt.show()


x_ticks = tokens[item]
y_ticks = tokens[item]

sns.heatmap(
    data=attention_head_mean[-1],
    vmin=0,
    vmax=1,
    xticklabels=x_ticks,
    yticklabels=y_ticks,
    cmap="YlOrRd",
)

plt.show()


x_ticks = tokens[item][1:-1]
y_ticks = [i for i in range(12, 0, -1)]

sns.heatmap(
    data=np.flip(attention_head_mean[:, 0, 1:-1], axis=0),
    xticklabels=x_ticks,
    yticklabels=y_ticks,
    cmap="YlOrRd",
)

plt.show()


def residual(attention_head_mean):
    attention_residual = (
        0.5 * attention_head_mean
        + 0.5 * np.eye(attention_head_mean.shape[1])[None, ...]
    )
    return attention_residual


attention_res = residual(attention_head_mean)

x_ticks = tokens[item][1:-1]
y_ticks = [i for i in range(12, 0, -1)]

sns.heatmap(
    data=np.flip(attention_res[:, 0, 1:-1], axis=0),
    xticklabels=x_ticks,
    yticklabels=y_ticks,
    cmap="YlOrRd",
)

plt.show()


def rollout(attention_res):
    rollout_attention = np.zeros(attention_res.shape)
    rollout_attention[0] = attention_res[0]
    n_layers = attention_res.shape[0]
    for i in range(1, n_layers):
        rollout_attention[i] = attention_res[i].dot(rollout_attention[i - 1])
    return rollout_attention


rollout_attention = rollout(attention_res)


x_ticks = tokens[item][1:-1]
y_ticks = [i for i in range(12, 0, -1)]

sns.heatmap(
    data=np.flip(rollout_attention[:, 0, 1:-1], axis=0),
    xticklabels=x_ticks,
    yticklabels=y_ticks,
    cmap="YlOrRd",
)

plt.show()

	CRIM	ZN	INDUS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT	target
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1.0	296.0	15.3	396.90	4.98	24.0
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2.0	242.0	17.8	396.90	9.14	21.6
2	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2.0	242.0	17.8	392.83	4.03	34.7
3	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3.0	222.0	18.7	394.63	2.94	33.4
4	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3.0	222.0	18.7	396.90	5.33	36.2

	CRIM	ZN	INDUS	CHAS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT	target
count	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000	506.000000
mean	3.613524	11.363636	11.136779	0.069170	0.554695	6.284634	68.574901	3.795043	9.549407	408.237154	18.455534	356.674032	12.653063	22.532806
std	8.601545	23.322453	6.860353	0.253994	0.115878	0.702617	28.148861	2.105710	8.707259	168.537116	2.164946	91.294864	7.141062	9.197104
min	0.006320	0.000000	0.460000	0.000000	0.385000	3.561000	2.900000	1.129600	1.000000	187.000000	12.600000	0.320000	1.730000	5.000000
25%	0.082045	0.000000	5.190000	0.000000	0.449000	5.885500	45.025000	2.100175	4.000000	279.000000	17.400000	375.377500	6.950000	17.025000
50%	0.256510	0.000000	9.690000	0.000000	0.538000	6.208500	77.500000	3.207450	5.000000	330.000000	19.050000	391.440000	11.360000	21.200000
75%	3.677083	12.500000	18.100000	0.000000	0.624000	6.623500	94.075000	5.188425	24.000000	666.000000	20.200000	396.225000	16.955000	25.000000
max	88.976200	100.000000	27.740000	1.000000	0.871000	8.780000	100.000000	12.126500	24.000000	711.000000	22.000000	396.900000	37.970000	50.000000

	name	coef
12	LSTAT	-3.743627
7	DIS	-3.104044
5	RM	2.674230
8	RAD	2.662218
9	TAX	-2.076782
10	PTRATIO	-2.060607
4	NOX	-2.056718
1	ZN	1.081569
0	CRIM	-0.928146
11	B	0.849268
3	CHAS	0.681740
2	INDUS	0.140900
6	AGE	0.019466

	name	feature importances
12	LSTAT	0.450044
5	RM	0.362574
7	DIS	0.062982
0	CRIM	0.034634
4	NOX	0.021108
10	PTRATIO	0.016098
9	TAX	0.014828
6	AGE	0.014450
11	B	0.010778
2	INDUS	0.006308
8	RAD	0.003774
1	ZN	0.001534
3	CHAS	0.000888

	CRIM	ZN	INDUS	NOX	RM	AGE	DIS	RAD	TAX	PTRATIO	B	LSTAT	target
0	0.00632	18.0	2.31	0.538	6.575	65.2	4.0900	1.0	296.0	15.3	396.90	4.98	24.0
1	0.02731	0.0	7.07	0.469	6.421	78.9	4.9671	2.0	242.0	17.8	396.90	9.14	21.6
2	0.02729	0.0	7.07	0.469	7.185	61.1	4.9671	2.0	242.0	17.8	392.83	4.03	34.7
3	0.03237	0.0	2.18	0.458	6.998	45.8	6.0622	3.0	222.0	18.7	394.63	2.94	33.4
4	0.06905	0.0	2.18	0.458	7.147	54.2	6.0622	3.0	222.0	18.7	396.90	5.33	36.2

Мотивация использования Explainability¶

Объяснимость моделей классического ML¶

Оценка важности признаков для линейных моделей¶

Пример для табличных данных (Boston Dataset)¶

Оценка важности признаков для деревьев решений¶

Пример для табличных данных (Boston Dataset)¶

Методы, изучающие отклик модели на изменение входных данных¶

ICE (Individual Conditional Expectation)¶

Пример для табличных данных (Boston Dataset)¶

LIME (Local Interpretable Model-agnostic Explanations)¶

Принцип работы¶

Описание алгоритма¶

Как получить набор объектов вблизи искомого?¶

Ограничения¶

Пример NLP (классификация статей)¶

Пример для изображений (ResNet18)¶

Идея¶

Анализ ResNet18¶

SHAP (SHapley Additive exPlanations)¶

Принцип работы¶

Kernel SHAP¶

Пример для табличных данных (Boston Dataset)¶

Пример NLP (перевод с английского на русский)¶

Пример NLP (абстрактное обобщение текста)¶

Градиентные методы¶

Vanilla Gradient¶

Идея метода¶

Пример изображения (ResNet18)¶

Проблема насыщения (saturation)¶

Adversarial attacks¶

SmoothGrad¶

Идея метода¶

Пример изображения (ResNet18)¶

Integrated Gradients¶

Идея метода¶

Пример изображения (ResNet18)¶

Grad-CAM¶

Идея метода¶

Пример изображения (ResNet18)¶

Критика градиентных методов¶

Методы, специфичные для трансформеров¶

Attention rollout¶

Attention Flow¶

Gradient-weighted Attention Rollout¶

Заключение¶