import matplotlib.pyplot as plt
import numpy as np


np.random.seed(0)
x = np.random.rand(100, 1)
y = 2 + 3 * x + (np.random.rand(100, 1) - 0.5)

plt.figure(figsize=(5, 3))
plt.scatter(x, y, s=10)
plt.xlabel("x")
plt.ylabel("y")
plt.show()


plt.figure(figsize=(5, 3))
plt.scatter(x, y, s=10)
for m in np.arange(-5.0, 7.0, 1):
    for b in [-1, 0, 1, 2, 3]:
        y_predicted = b + m * x
        plt.plot(x, y_predicted, color="r", alpha=0.3)
plt.show()


plt.figure(figsize=(5, 3))
plt.scatter(x, y, s=10)
for m in np.arange(-5.0, 7.0, 1):
    for b in [-1, 0, 1, 2, 3]:
        y_predicted = b + m * x
        plt.plot(x, y_predicted, color="r", alpha=0.3)
plt.plot(x, 2 + 3 * x, color="g")
plt.show()


loss = np.sum((y - (4 * x + 2)) ** 2) / (len(x))
print(f"loss value = {loss:.05f}")

loss value = 0.36800


from sklearn.metrics import mean_squared_error, r2_score


def list_mult(x, y):
    return [xi * yi for xi, yi in zip(x, y)]


def estimate_coef(x, y):
    n = len(x)
    assert n == len(y)
    b_0 = (n * sum(list_mult(x, y)) - sum(x) * sum(y)) / (
        n * sum(list_mult(x, x)) - sum(x) ** 2
    )
    b_1 = (sum(y) - b_0 * sum(x)) / n
    return (b_1, b_0)


coefs = estimate_coef(x, y)

y_predicted = coefs[0] + coefs[1] * x

print("Estimated coefficients:\nb_0 = {} \nb_1 = {}".format(coefs[0], coefs[1]))
print("Root mean squared error: ", mean_squared_error(y, y_predicted))
print("R2 score: ", r2_score(y, y_predicted))

plt.figure(figsize=(5, 3))
plt.scatter(x, y, s=10)
plt.plot(x, y_predicted, color="r")
plt.xlabel("x")
plt.ylabel("y")
plt.show()

Estimated coefficients:
b_0 = [2.05808002] 
b_1 = [2.93655106]
Root mean squared error:  0.0762332458287501
R2 score:  0.9038655568672764


import pandas as pd

dataset = pd.read_csv(
    "https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/student_scores.csv"
)
print(dataset.shape)
dataset.head()

(25, 2)


import seaborn as sns


sns.jointplot(data=dataset, x="Scores", y="Hours", height=5)
plt.show()


from sklearn.model_selection import train_test_split

x = dataset.iloc[:, :-1].values  # column Hours
y = dataset.iloc[:, 1].values  # column Score

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)


from sklearn.linear_model import LinearRegression

regressor = LinearRegression()


regressor.fit(x_train, y_train)

LinearRegression()

LinearRegression()


x_train.shape

(20, 1)


x_points = np.linspace(min(x_train), max(x_train), 100)  # 100 dots at min to max
y_pred = regressor.predict(x_points)

plt.figure(figsize=(6, 4))
plt.plot(x_train, y_train, "o", label="Scores")
plt.plot(
    x_points,
    y_pred,
    label="y = %.2fx+%.2f" % (regressor.coef_[0], regressor.intercept_),
)
plt.title("Hours vs Percentage", size=12)
plt.xlabel("Hours Studied", size=12)
plt.ylabel("Percentage Score", size=12)
plt.legend()
plt.show()


y_pred = regressor.predict(x_test)

x_points = np.linspace(min(x_test), max(x_test), 100)
y_pred = regressor.predict(x_points)

plt.figure(figsize=(6, 4))
plt.plot(x_test, y_test, "o", label="Scores")
plt.plot(
    x_points,
    y_pred,
    label="y = %.2fx+%.2f" % (regressor.coef_[0], regressor.intercept_),
)
plt.title("Hours vs Percentage", size=12)
plt.xlabel("Hours Studied", size=12)
plt.ylabel("Percentage Score", size=12)
plt.legend()
plt.show()


from sklearn import metrics

y_pred = regressor.predict(x_test)

print("Mean Absolute Error: %9.2f" % metrics.mean_absolute_error(y_test, y_pred))
print("Mean Squared Error: %10.2f" % metrics.mean_squared_error(y_test, y_pred))
print(
    "Root Mean Squared Error: %5.2f"
    % np.sqrt(metrics.mean_squared_error(y_test, y_pred))
)

Mean Absolute Error:      3.92
Mean Squared Error:      18.94
Root Mean Squared Error:  4.35


import sklearn.datasets

cancer = sklearn.datasets.load_breast_cancer()  # load data
x = cancer.data  # features
y = cancer.target  # labels(classes)
print(f"x shape: {x.shape}, y shape: {y.shape}")
print(f"x[0]: \n {x[0]}")
print(f"y[0]: \n {y[0]}")

x shape: (569, 30), y shape: (569,)
x[0]: 
 [1.799e+01 1.038e+01 1.228e+02 1.001e+03 1.184e-01 2.776e-01 3.001e-01
 1.471e-01 2.419e-01 7.871e-02 1.095e+00 9.053e-01 8.589e+00 1.534e+02
 6.399e-03 4.904e-02 5.373e-02 1.587e-02 3.003e-02 6.193e-03 2.538e+01
 1.733e+01 1.846e+02 2.019e+03 1.622e-01 6.656e-01 7.119e-01 2.654e-01
 4.601e-01 1.189e-01]
y[0]: 
 0


pd.DataFrame(x).describe()


plt.figure(figsize=(6, 4))
ax = sns.boxenplot(data=pd.DataFrame(x), orient="h", palette="Set2")
ax.set(xscale="log", xlim=(1e-4, 1e4), xlabel="Values", ylabel="Features")
plt.show()


from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler


test = x[:, 0].reshape(-1, 1)

plt.figure(1, figsize=(10, 7))
plt.subplot(221)  # set location
plt.scatter(test, range(len(test)), c=y, s=15)
plt.ylabel("Num examples", fontsize=10)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.title("Non scaled data", fontsize=12)

# scale data with MinMaxScaler
test_scaled = MinMaxScaler().fit_transform(test)
plt.subplot(222)
plt.scatter(test_scaled, range(len(test)), c=y, s=15)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.title("MinMaxScaler", fontsize=12)

# scale data  with StandardScaler
test_scaled = StandardScaler().fit_transform(test)
plt.subplot(223)
plt.scatter(test_scaled, range(len(test)), c=y, s=15)
plt.ylabel("Num examples", fontsize=10)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.title("StandardScaler", fontsize=12)

# scale data  with RobustScaler
test_scaled = RobustScaler().fit_transform(test)
plt.subplot(224)
plt.scatter(test_scaled, range(len(test)), c=y, s=15)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.title("RobustScaler", fontsize=12)
plt.show()


x_norm = StandardScaler().fit_transform(x)  # scaled data


pd.DataFrame(x_norm).describe()


plt.figure(figsize=(6, 4))
ax = sns.boxenplot(data=pd.DataFrame(x_norm), orient="h", palette="Set2")
ax.set(xlabel="Values", ylabel="Features")
plt.show()


cancer_df = pd.DataFrame(data=cancer.data, columns=cancer.feature_names)

# Compute the correlation matrix
corr = cancer_df.corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(8, 6))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(
    corr,
    mask=mask,
    cmap=cmap,
    vmax=0.3,
    center=0,
    square=True,
    linewidths=0.5,
    cbar_kws={"shrink": 0.5},
)
plt.show()


def generate_data(total_len=40):
    x = np.hstack(
        [
            np.random.uniform(14, 21, total_len // 2),
            np.random.uniform(24, 33, total_len // 2),
        ]
    )
    y = np.hstack([np.zeros(total_len // 2), np.ones(total_len // 2)])
    return x, y


def plot_data(x, y, total_len=40, s=50, threshold=None, margin=None):
    ax = sns.scatterplot(x=x, y=np.zeros(len(x)), hue=y, s=s)
    if threshold:
        ax.axvline(threshold, color="red", ls="dashed")
    if margin:
        for line in margin:
            ax.axvline(line, color="pink", ls="dashed")
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, ["Normal", "Obese"])
    ax.set(xlabel="Mass, g")
    return ax


total_len = 40
x, y = generate_data(total_len=total_len)
plt.figure(figsize=(5, 3))
ax = plot_data(x, y, threshold=21.5, total_len=total_len)


x_test = np.random.uniform(14, 30, 5)


def classify(x, threshold=21.5):
    y = np.zeros_like(x)
    y[x > threshold] = 1
    return y


total_len = 40
threshold = 21.5
x, y = generate_data(total_len=total_len)
plt.figure(figsize=(5, 3))
ax = plot_data(x, y, threshold=threshold, total_len=total_len)
ax = plot_data(x_test, classify(x_test, threshold), total_len=total_len, s=300)


x_test = np.array([21.45, 22.5])

total_len = 40
x, y = generate_data(total_len=total_len)
plt.figure(figsize=(5, 3))
ax = plot_data(x, y, total_len=total_len)
ax = plot_data(x_test, classify(x_test), threshold=21.5, total_len=total_len, s=300)


x, y = generate_data(total_len=total_len)
normal_limit = x[y == 0].max()  # extreme point for 'normal'
obese_limit = x[y == 1].min()  # extreme point for 'obese'

threshold = np.mean([normal_limit, obese_limit])  # separated with mean value

x_test = np.array([21.5, 23])
plt.figure(figsize=(5, 3))
ax = plot_data(
    x, y, total_len=total_len, threshold=threshold, margin=[normal_limit, obese_limit]
)
ax = plot_data(
    x_test,
    classify(x_test, threshold=threshold),
    total_len=total_len,
    s=300,
    threshold=threshold,
)


margins = np.abs(x_test - threshold)
print(margins)

[0.45841656 1.04158344]


margin_0 = np.abs(normal_limit - threshold)
margin_1 = np.abs(obese_limit - threshold)
print(margin_0, margin_1)

2.3598453649025473 2.3598453649025473


def generate_realistic_data(total_len=40):
    x = np.hstack(
        [
            np.random.uniform(14, 21, total_len // 2),
            np.random.uniform(24, 33, total_len // 2),
        ]
    )
    y = np.hstack([np.zeros(total_len // 2), np.ones(total_len // 2)])
    indx = np.where(x == x[y == 1].min())[0]
    y[indx] = 0
    s = np.ones_like(x) * 50
    s[indx] = 300
    return x, y, s


total_len = 40
x, y, s = generate_realistic_data(total_len=total_len)
plt.figure(figsize=(5, 3))
ax = plot_data(x, y, total_len=total_len, s=s)


from sklearn import svm
from sklearn.datasets import make_blobs


def generate_2d_data(total_len=40):
    x, y = make_blobs(n_samples=total_len, centers=2, random_state=42)
    x[:, 0] += 10
    x[:, 1] += 20
    return x, y


def plot_data(x, y, total_len=40, s=50, threshold=21.5):
    ax = sns.scatterplot(x=x[:, 0], y=x[:, 1], hue=y, s=s)
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, ["Normal", "Obese"])
    ax.set(xlabel="Mass, g", ylabel="Length, cm")
    return ax


total_len = 40
x, y = generate_2d_data(total_len=total_len)
plt.figure(figsize=(5, 3))
ax = plot_data(x, y, total_len=total_len)

# Code for illustration, later we will understand how it works
# fit the model, don't regularize for illustration purposes
clf = svm.SVC(kernel="linear", C=1000)
clf.fit(x, y)

# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()

# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)

# plot decision boundary and margins
ax.contour(
    XX, YY, Z, colors="k", levels=[-1, 0, 1], alpha=0.5, linestyles=["--", "-", "--"]
)
# plot support vectors
ax.scatter(
    clf.support_vectors_[:, 0],
    clf.support_vectors_[:, 1],
    s=100,
    linewidth=1,
    facecolors="none",
    edgecolors="k",
)
plt.show()


def generate_3d_data(total_len=40):
    x, y = make_blobs(n_samples=total_len, centers=2, random_state=42, n_features=3)
    x[:, 0] += 10
    x[:, 1] += 20
    x[:, 2] += 10
    return x, y


def plot_data(x, y, total_len=40, s=50, threshold=21.5):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.scatter(xs=x[:, 0], ys=x[:, 1], zs=x[:, 2], c=y, s=s, cmap="Set1")
    # plot the decision function
    ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    # create grid to evaluate model
    xx = np.linspace(xlim[0], xlim[1], 30)
    yy = np.linspace(ylim[0], ylim[1], 30)
    YY, XX = np.meshgrid(yy, xx)
    ax.plot_surface(XX, YY, XX * YY * 0.2, alpha=0.2)
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, ["Normal", "Obese"])
    ax.set(xlabel="Mass, g", ylabel="Length, cm", zlabel="Age, days")
    return ax


total_len = 40
x, y = generate_3d_data(total_len=total_len)
ax = plot_data(x, y, total_len=total_len)


img = np.array([56, 231, 24, 2])
w_cat = np.array([0.2, -0.5, 0.1, 2.0])
print("Image ", img)
print("Weights ", w_cat)
print("img * w_cat ", img * w_cat)
print("sum ", (img * w_cat).sum())
print("Add bias ", (img * w_cat).sum() + 1.1)

Image  [ 56 231  24   2]
Weights  [ 0.2 -0.5  0.1  2. ]
img * w_cat  [  11.2 -115.5    2.4    4. ]
sum  -97.89999999999999
Add bias  -96.8


import matplotlib.pyplot as plt

x = [i for i in range(-5, 6)]
y = [abs(i) for i in range(-5, 6)]

plt.figure(figsize=(6, 4))
plt.plot(x, y, label="y = |x|")
plt.title("y = |x|", size=20)
plt.legend()
plt.show()


x = [i for i in range(-5, 1, 1)]
y = [i * 0 - 1 for i in range(6)]
x_1 = [i for i in range(0, 6)]
y_1 = [i * 0 + 1 for i in range(0, 6)]

plt.figure(figsize=(6, 4))
plt.plot(x, y, "b")
plt.plot(x_1, y_1, "b")
plt.plot(0, 0, "ro")
plt.plot(0, 1, "bo")
plt.plot(0, -1, "bo")
plt.show()


import numpy as np


np.random.seed(101)
x_train = np.random.rand(1000, 5)
noise = np.random.rand(1000, 1) / 1000
y_train = np.expand_dims(np.sum(x_train**3, axis=-1), axis=1) + noise

print(f"x_train shape: {x_train.shape}, y_train shape: {y_train.shape}")

x_train shape: (1000, 5), y_train shape: (1000, 1)


import random


class LinearRegressor:
    def __init__(self, in_features, out_features, batch_size, random_state=42):
        self.in_features = in_features  # num of inputs
        self.out_features = out_features  # num of outputs

        np.random.seed(random_state)
        self.W = (
            np.random.randn(self.in_features + 1, self.out_features) * 0.0001
        )  # generate random weights, reshape to add bias
        self.batch_size = batch_size  # batch_size

    def fit(self, x_train, y_train, learning_rate=1e-8):
        loss = 0.0  # reset loss
        train_len = x_train.shape[0]  # num of examples
        indexes = list(range(train_len))  # indexes train_len
        random.shuffle(indexes)

        for i in range(0, train_len, self.batch_size):
            idx = indexes[i : i + self.batch_size]
            x_batch = x_train[idx]
            y_batch = y_train[idx]

            x_batch = np.hstack([x_batch, np.ones((x_batch.shape[0], 1))])  # add bias

            loss_val, grad = self.loss(x_batch, y_batch)  # loss and gradient
            self.W -= learning_rate * grad  # update weigths

            loss += loss_val  # loss sum
        return loss / (train_len)  # mean loss

    def loss(self, x, y):
        """
        MSE loss
        """
        current_batch_size = x.shape[0]  # batch_size
        loss = 0.0
        dW = np.zeros(self.W.shape)
        for i in range(current_batch_size):
            y_preds_on_batch = x[i].dot(self.W)  # vector of shape out_features
            y_true_on_batch = y[i]
            loss += np.sum((y_preds_on_batch - y_true_on_batch) ** 2)
            # dW_{m, n} = 2 * x_m ((Wx)_n - y_n)
            dW += np.meshgrid(y_preds_on_batch - y_true_on_batch, x[i])[0]

        loss /= current_batch_size
        dW /= current_batch_size
        return loss, dW

    def forward(self, x):
        x = np.append(x, 1)  # add 1 (bias)
        scores = x.dot(self.W)
        return scores


from tqdm.notebook import tqdm


def train_and_plot_history(learning_rates_list, batch_size=64, num_epochs=20):
    fig, ax = plt.subplots(1, 1, figsize=(8, 4))
    for lr_id, lr in tqdm(enumerate(learning_rates_list)):
        regressor = LinearRegressor(
            in_features=x_train.shape[1], out_features=y_train.shape[1], batch_size=42
        )
        loss_hist = np.zeros(num_epochs)
        for epoch in range(num_epochs):
            loss = regressor.fit(x_train, y_train, learning_rate=lr)
            loss_hist[epoch] = loss
        ax.plot(loss_hist, label="lr={:0.1e}".format(lr))

    ax.set_xticks(range(num_epochs))
    ax.set_xlabel("epoch")
    ax.set_ylabel("MSE loss")
    # ax.set_xlim([3, 8])
    plt.legend(bbox_to_anchor=(1.05, 1.0), loc="upper left")
    plt.show()


train_and_plot_history(learning_rates_list=np.linspace(start=5e-4, stop=7e-4, num=4))

0it [00:00, ?it/s]


train_and_plot_history(learning_rates_list=np.linspace(start=5e-9, stop=7e-9, num=4))

0it [00:00, ?it/s]


train_and_plot_history(learning_rates_list=np.linspace(start=0.5, stop=0.55, num=4))

0it [00:00, ?it/s]


x = np.linspace(0, 2 * np.pi, 10)
y = np.sin(x) + np.random.normal(scale=0.25, size=len(x))
x_true = np.linspace(0, 2 * np.pi, 200)
y_true = np.sin(x_true)

plt.figure(figsize=(5, 3))
plt.scatter(x, y, s=50, facecolors="none", edgecolors="b", label="noisy data")
plt.plot(x_true, y_true, c="lime", label="ground truth")
plt.legend()
plt.show()


from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures

x_train = x.reshape(-1, 1)

fig = plt.figure(figsize=(10, 5))

for i, degree in enumerate([0, 1, 3, 9]):
    model = make_pipeline(PolynomialFeatures(degree), LinearRegression())

    model.fit(x_train, y)
    y_plot = model.predict(x_true.reshape(-1, 1))

    fig.add_subplot(2, 2, i + 1)
    plt.plot(x_true, y_plot, c="red", label=f"M={degree}")
    plt.scatter(x, y, s=50, facecolors="none", edgecolors="b")
    plt.plot(x_true, y_true, c="lime")
    plt.legend()
plt.show()


from sklearn.linear_model import Ridge

model = make_pipeline(PolynomialFeatures(9), LinearRegression())
model_ridge = make_pipeline(PolynomialFeatures(9), Ridge(alpha=0.1))

model.fit(x_train, y)
y_plot = model.predict(x_true.reshape(-1, 1))

model_ridge.fit(x_train, y)
y_plot_ridge = model_ridge.predict(x_true.reshape(-1, 1))

plt.figure(figsize=(5, 3))
plt.plot(x_true, y_plot, c="red", label=f"M={degree}")
plt.plot(x_true, y_plot_ridge, c="black", label=f"M={degree}, alpha=0.1")
plt.scatter(x, y, s=50, facecolors="none", edgecolors="b")
plt.plot(x_true, y_true, c="lime", label="ground truth")
plt.legend()
plt.show()

poly_coef = model[1].coef_

eq = f"y = {round(poly_coef[0], 2)}+{round(poly_coef[1], 2)}*x"
for i in range(2, 10):
    eq += f"+{round(poly_coef[i], 2)}*x^{i}"

print("Without regularization: ", eq)

poly_coef = model_ridge[1].coef_

eq = f"y = {round(poly_coef[0], 2)}+{round(poly_coef[1], 2)}*x"
for i in range(2, 10):
    eq += f"+{round(poly_coef[i], 2)}*x^{i}"

print("With regularization: ", eq)

Without regularization:  y = 0.0+7.3*x+-21.31*x^2+25.13*x^3+-14.79*x^4+4.69*x^5+-0.8*x^6+0.06*x^7+-0.0*x^8+-0.0*x^9
With regularization:  y = 0.0+0.24*x+0.16*x^2+0.15*x^3+-0.01*x^4+-0.23*x^5+0.15*x^6+-0.04*x^7+0.0*x^8+-0.0*x^9


from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np


def generate_patients_data(total_len=40):
    x = np.random.uniform(0, 50, total_len)
    y = np.zeros_like(x)
    y[(x > 15) & (x < 35)] = 1
    return x, y


def plot_data(x, y, total_len=40, s=50):
    plt.figure(figsize=(5, 3))
    ax = sns.scatterplot(x=x, y=np.zeros(len(x)), hue=y, s=s)
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, ["Recover", "Sick"])
    ax.set(xlabel="dose, mg")
    return ax


total_len = 40
x, y = generate_patients_data(total_len=total_len)
ax = plot_data(x, y, total_len=total_len)
plt.show()


def plot_data(x, y, total_len=40, s=50):
    plt.figure(figsize=(5, 3))
    ax = sns.scatterplot(x=x[0, :], y=x[1, :], hue=y, s=s)
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, ["Recover", "Sick"])
    ax.set(xlabel="Dose, mg")
    ax.set(ylabel="Dose$^2$")
    return ax


total_len = 40
x_1, y = generate_patients_data(total_len=total_len)
x_2 = x_1**2
x = np.vstack([x_1, x_2])

plot_data(x, y, total_len=40, s=50)
plt.show()


plot_data(x, y, total_len=40, s=50)

x_arr = np.linspace(0, 50, 50)
xs = [x[0, :][y == 1].min(), x[0, :][y == 1].max()]
ys = [x[1, :][y == 1].min(), x[1, :][y == 1].max()]

# Calculate the coefficients.
coefficients = np.polyfit(xs, ys, 1)

# Let's compute the values of the line...
polynomial = np.poly1d(coefficients)
y_axis = polynomial(x_arr)

# ...and plot the points and the line
plt.plot(x_arr, y_axis, "r--")
plt.show()


import sklearn
from sklearn.datasets import load_wine

# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_wine.html#sklearn.datasets.load_wine

# Download dataset
features, class_labels = load_wine(
    return_X_y=True, as_frame=True
)  # also we can get data in Bunch (dictionary) or pandas DataFrame

wine_dataset = features
wine_dataset["target"] = class_labels

wine_dataset.head()


import seaborn as sns
import matplotlib.pyplot as plt


sns.displot(wine_dataset, x="alcohol", kind="kde", fill=True, height=4)
plt.show()


wine_dataset.target.unique()

array([0, 1, 2])


sns.displot(wine_dataset, x="alcohol", hue="target", kind="kde", fill=True, height=4)
plt.show()


from sklearn.model_selection import train_test_split

# Split the data into train and test data
x_train, x_test, y_train, y_test = train_test_split(
    features.values, class_labels.values, test_size=0.25, random_state=42
)


from sklearn.metrics import f1_score
from sklearn.naive_bayes import GaussianNB

# Train the model
model = GaussianNB()
model.fit(x_train, y_train)

# Calculate F1_score
pred = model.predict(x_test)
f1_score(y_test, pred, average="macro")

1.0


logits = [
    5.1,  # cat
    3.2,  # car
    -1.7,  # frog
]


import numpy as np

print("Predicted class = %i (Cat)" % (np.argmax(logits)))

Predicted class = 0 (Cat)


plt.figure(figsize=(5, 3))
plt.scatter(np.arange(3), [1, 0, 0], color="red", s=50)
plt.show()


def softmax(logits):
    return np.exp(logits) / np.sum(np.exp(logits))


print(softmax(logits))
print("Sum = %.2f" % np.sum(softmax(logits)))

[0.86904954 0.12998254 0.00096793]
Sum = 1.00


rand_logits = np.linspace(-1, 1, 50)
fig, ax = plt.subplots(ncols=2, figsize=(6, 3))

ax[0].plot(np.arange(50), rand_logits)
ax[0].set_title("Logits")
ax[1].plot(np.arange(50), softmax(rand_logits))
ax[1].set_title("Softmax")
plt.show()


from warnings import simplefilter

simplefilter("ignore", category=RuntimeWarning)

f = np.array([123, 456, 789])
p = np.exp(f) / np.sum(np.exp(f))
print(f, p)

[123 456 789] [ 0.  0. nan]


f = np.array([123, 456, 789])
f -= f.max()
p = np.exp(f) / np.sum(np.exp(f))
print(f, p)

[-666 -333    0] [5.75274406e-290 2.39848787e-145 1.00000000e+000]


# normal coin
p1 = 0.5
p2 = 0.5

# fake coin
q1 = 0.2
q2 = 0.8


# Kullback–Leibler divergence
div_kl = p1 * np.log2(p1 / q1) + p2 * np.log2(p2 / q2)
print(f"Dkl(P||Q) = {div_kl:.3f}")

# Entropy normal coin
h_p = -p1 * np.log2(p1) - p2 * np.log2(p2)
print(f"H(P) = {h_p:.3f}")

# Entropy fake coin
h_q = -q1 * np.log2(q1) - q2 * np.log2(q2)
print(f"H(Q) = {h_q:.3f}")

# Cross-entropy
h_p_q = -p1 * np.log2(q1) - p2 * np.log2(q2)
print(f"H(P||Q) = {h_p_q:.3f}")
print(f"H(P||Q) = Dkl(P||Q) + H(P) = {h_p+div_kl:.3f}")

Dkl(P||Q) = 0.322
H(P) = 1.000
H(Q) = 0.722
H(P||Q) = 1.322
H(P||Q) = Dkl(P||Q) + H(P) = 1.322


def cross_entropy_loss(pred_prob, true_prob):
    return -np.dot(true_prob, np.log(pred_prob))


print(f"Logits = {logits}")

pred_prob = softmax(logits)
print(f"Predicted Probabilities = {pred_prob}")

true_prob = [1.0, 0.0, 0.0]
print(f"True Probabilities = {true_prob}")

print(f"Cross-entropy loss = {cross_entropy_loss(pred_prob, true_prob):.3f}")

Logits = [5.1, 3.2, -1.7]
Predicted Probabilities = [0.86904954 0.12998254 0.00096793]
True Probabilities = [1.0, 0.0, 0.0]
Cross-entropy loss = 0.140


# Input batch of 2 vector with 4 elements
x = np.array([[1, 2, 3, 4], [1, -2, 0, 0]])
# Weights
W = np.random.randn(3, 4)  # 3 class

# model output
logits = x.dot(W.T)
print("Scores(Logits) \n", logits, "\n")

# Probabilities
probs = softmax(logits)  # defined before
print("Probs \n", probs, "\n")

# Ground true classes
y = [0, 1]

# Derivative
probs[np.arange(2), y] = -1  # substract one from true class prob
dW = x.T.dot(probs)  # dot product with input

print("Grads dL/dW \n", dW)  # have same shape as W

Scores(Logits) 
 [[-8.18596096  2.85515452  2.2492031 ]
 [ 0.68506177 -0.93389703  0.19502048]] 

Probs 
 [[9.14739601e-06 5.70680466e-01 3.11337656e-01]
 [6.51528926e-02 1.29071046e-02 3.99127338e-02]] 

Grads dL/dW 
 [[-0.93484711 -0.42931953  0.35125039]
 [-2.13030579  3.14136093  0.54284984]
 [-3.          1.7120414   0.93401297]
 [-4.          2.28272186  1.24535062]]


import random
import numpy as np


class LinearClassifier:
    def __init__(self, labels, batch_size, imgsize=28, nchannels=1, random_state=42):
        self.labels = labels  # classes names
        self.classes_num = len(labels)  # num of classes

        np.random.seed(random_state)
        self.W = (
            np.random.randn(nchannels * imgsize**2 + 1, self.classes_num) * 0.0001
        )  # generate random weights, reshape to add bias
        self.batch_size = batch_size  # batch_size

    def fit(self, x_train, y_train, learning_rate=1e-8):
        loss = 0.0  # reset loss
        train_len = x_train.shape[0]  # num of examples
        indexes = list(range(train_len))  # indexes train_len
        random.shuffle(indexes)

        for i in range(0, train_len, self.batch_size):
            idx = indexes[i : i + self.batch_size]
            x_batch = x_train[idx]
            y_batch = y_train[idx]

            x_batch = np.hstack([x_batch, np.ones((x_batch.shape[0], 1))])  # add bias

            loss_val, grad = self.loss(x_batch, y_batch)  # loss and gradient
            self.W -= learning_rate * grad  # update weigths

            loss += loss_val  # loss sum
        return loss / (train_len)  # mean loss

    def loss(self, x, y):
        current_batch_size = x.shape[0]  # batch_size
        loss = 0.0
        dW = np.zeros(self.W.shape)
        for i in range(current_batch_size):
            scores = x[i].dot(self.W)  # vector of shape 10
            correct_class_score = scores[int(y[i])]
            above_zero_loss_count = 0
            for j in range(self.classes_num):
                if j == y[i]:  # predict class
                    continue
                margin = scores[j] - correct_class_score + 1  # loss
                if margin > 0:
                    above_zero_loss_count += 1
                    loss += margin  #
                    dW[:, j] += x[i]  #
            dW[:, int(y[i])] -= above_zero_loss_count * x[i]
        loss /= current_batch_size
        dW /= current_batch_size
        return loss, dW

    def forward(self, x):
        x = np.append(x, 1)  # add 1 (bias)
        scores = x.dot(self.W)
        return np.argmax(scores)


def validate(model, x_test, y_test, noprint=False):
    correct = 0
    for i, img in enumerate(x_test):
        index = model.forward(img)
        correct += 1 if index == y_test[i] else 0
        if noprint is False:
            if i > 0 and i % 1000 == 0:
                print("Accuracy {:.3f}".format(correct / i))
    return correct / len(y_test)


def train_and_validate(
    model_class, x_train, y_train, x_test, y_test, labels, imgsize, nchannels
):
    print("How learning quality depends of speed:")

    for lr in [1e-2, 1e-8]:
        for bs in [256, 2048]:
            print("-" * 50, "\n", "learning_rate =", lr, "\tbatch_size =", bs)
            print()
            model = model_class(labels, bs, imgsize, nchannels)

            best_accuracy = 0
            for epoch in range(10):
                loss = model.fit(x_train, y_train, learning_rate=lr)
                accuracy = validate(model, x_test, y_test, noprint=True)
                if best_accuracy < accuracy:
                    best_accuracy = accuracy
                    best_epoch = epoch
                print(f"Epoch {epoch} \tLoss: {loss}, \tAccuracy:{accuracy}")

            print()
            print(f"Best accuracy is {best_accuracy} in {best_epoch} epoch")


from torchvision.datasets import MNIST
from IPython.display import clear_output

dataset_train = MNIST("content", train=True, download=True)
dataset_test = MNIST("content", train=False, download=True)

x_train = dataset_train.data.numpy().reshape((-1, 28 * 28))
y_train = np.array(dataset_train.targets)

x_test = dataset_test.data.numpy().reshape((-1, 28 * 28))
y_test = np.array(dataset_test.targets)

clear_output()

print(f"x_train shape: {x_train.shape}, y_train shape: {y_train.shape}")
print(f"x_train shape : {x_test.shape}, y_test shape: {y_test.shape}")

x_train shape: (60000, 784), y_train shape: (60000,)
x_train shape : (10000, 784), y_test shape: (10000,)


train_and_validate(
    LinearClassifier, x_train, y_train, x_test, y_test, np.arange(10), 28, 1
)

How learning quality depends of speed:
-------------------------------------------------- 
 learning_rate = 0.01 	batch_size = 256

Epoch 0 	Loss: 2.5098721986580346, 	Accuracy:0.8705
Epoch 1 	Loss: 1.4153980988992618, 	Accuracy:0.8153
Epoch 2 	Loss: 2.0772829548134704, 	Accuracy:0.8705
Epoch 3 	Loss: 1.3425311413875551, 	Accuracy:0.8924
Epoch 4 	Loss: 1.4324064153091902, 	Accuracy:0.8912
Epoch 5 	Loss: 1.8396557432352694, 	Accuracy:0.8932
Epoch 6 	Loss: 1.2290740544948997, 	Accuracy:0.8981
Epoch 7 	Loss: 1.3589895608044245, 	Accuracy:0.9021
Epoch 8 	Loss: 1.2833188445719912, 	Accuracy:0.9071
Epoch 9 	Loss: 1.2164956214043465, 	Accuracy:0.8953

Best accuracy is 0.9071 in 8 epoch
-------------------------------------------------- 
 learning_rate = 0.01 	batch_size = 2048

Epoch 0 	Loss: 0.5633132399535251, 	Accuracy:0.88
Epoch 1 	Loss: 0.14677605851411546, 	Accuracy:0.8895
Epoch 2 	Loss: 0.10339022744489589, 	Accuracy:0.8677
Epoch 3 	Loss: 0.6152971256528247, 	Accuracy:0.8917
Epoch 4 	Loss: 0.11297049350142452, 	Accuracy:0.8975
Epoch 5 	Loss: 0.08778798733577138, 	Accuracy:0.8837
Epoch 6 	Loss: 0.07745942321071399, 	Accuracy:0.9047
Epoch 7 	Loss: 0.6470424800086411, 	Accuracy:0.7813
Epoch 8 	Loss: 0.18175935300172769, 	Accuracy:0.899
Epoch 9 	Loss: 0.09568461976775788, 	Accuracy:0.9022

Best accuracy is 0.9047 in 6 epoch
-------------------------------------------------- 
 learning_rate = 1e-08 	batch_size = 256

Epoch 0 	Loss: 0.017419085056317746, 	Accuracy:0.7838
Epoch 1 	Loss: 0.007079441801842759, 	Accuracy:0.8287
Epoch 2 	Loss: 0.005478765637441953, 	Accuracy:0.8463
Epoch 3 	Loss: 0.0047464021421586185, 	Accuracy:0.8569
Epoch 4 	Loss: 0.0043017207164322765, 	Accuracy:0.8637
Epoch 5 	Loss: 0.003990656107009424, 	Accuracy:0.871
Epoch 6 	Loss: 0.0037675227267245474, 	Accuracy:0.8752
Epoch 7 	Loss: 0.00359140186643874, 	Accuracy:0.8779
Epoch 8 	Loss: 0.00345261153286036, 	Accuracy:0.8799
Epoch 9 	Loss: 0.0033321525472751194, 	Accuracy:0.8809

Best accuracy is 0.8809 in 9 epoch
-------------------------------------------------- 
 learning_rate = 1e-08 	batch_size = 2048

Epoch 0 	Loss: 0.004283987276005508, 	Accuracy:0.2468
Epoch 1 	Loss: 0.0033388230062430723, 	Accuracy:0.4564
Epoch 2 	Loss: 0.002547433767736788, 	Accuracy:0.6118
Epoch 3 	Loss: 0.00198155031694344, 	Accuracy:0.6904
Epoch 4 	Loss: 0.0016326702103084264, 	Accuracy:0.7321
Epoch 5 	Loss: 0.0014073636015865705, 	Accuracy:0.7568
Epoch 6 	Loss: 0.0012515353890091999, 	Accuracy:0.7725
Epoch 7 	Loss: 0.0011413027342624953, 	Accuracy:0.7855
Epoch 8 	Loss: 0.0010573937618037724, 	Accuracy:0.7959
Epoch 9 	Loss: 0.0009932982778611983, 	Accuracy:0.8043

Best accuracy is 0.8043 in 9 epoch


import os

file_exists = os.path.exists("/content/cifar-10-batches-py")
if file_exists == False:
    #!wget -q https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
    !wget -q https://edunet.kea.su/repo/EduNet-web_dependencies/datasets/cifar-10-python.tar.gz
    !tar -xzf cifar-10-python.tar.gz
clear_output()


import pickle


def unpickle(file):
    with open(file, "rb") as fo:
        dict = pickle.load(fo, encoding="bytes")
    return dict


x_train = np.zeros((0, 3072))
y_train = np.array([])
for i in range(1, 6):
    # raw = unpickle(f"/content/cifar-10-batches-py/data_batch_{i}")
    raw = unpickle(f"cifar-10-batches-py/data_batch_{i}")
    x_train = np.append(x_train, np.array(raw[b"data"]), axis=0)
    y_train = np.append(y_train, np.array(raw[b"labels"]), axis=0)

# test = unpickle("/content/cifar-10-batches-py/test_batch")
test = unpickle("cifar-10-batches-py/test_batch")
x_test = np.array(test[b"data"])
y_test = np.array(test[b"labels"])

labels_eng = [
    "Airplane",
    "Car",
    "Bird",
    "Cat",
    "Deer",
    "Dog",
    "Frog",
    "Horse",
    "Ship",
    "Truck",
]

print(f"x_train shape: {x_train.shape}, y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape}, y_test shape: {y_test.shape}")

x_train shape: (50000, 3072), y_train shape: (50000,)
x_test shape: (10000, 3072), y_test shape: (10000,)


train_and_validate(
    LinearClassifier, x_train, y_train, x_test, y_test, labels_eng, 32, 3
)

How learning quality depends of speed:
-------------------------------------------------- 
 learning_rate = 0.01 	batch_size = 256

Epoch 0 	Loss: 1772.198787043581, 	Accuracy:0.2372
Epoch 1 	Loss: 1387.99798025283, 	Accuracy:0.2435
Epoch 2 	Loss: 1307.6473435167914, 	Accuracy:0.2507
Epoch 3 	Loss: 1256.5014042195996, 	Accuracy:0.209
Epoch 4 	Loss: 1216.307964201034, 	Accuracy:0.2345
Epoch 5 	Loss: 1214.527957299643, 	Accuracy:0.2236
Epoch 6 	Loss: 1167.7506872814574, 	Accuracy:0.217
Epoch 7 	Loss: 1165.1991212929063, 	Accuracy:0.1825
Epoch 8 	Loss: 1176.5519427643487, 	Accuracy:0.12
Epoch 9 	Loss: 1131.8868549203364, 	Accuracy:0.1926

Best accuracy is 0.2507 in 2 epoch
-------------------------------------------------- 
 learning_rate = 0.01 	batch_size = 2048

Epoch 0 	Loss: 294.23438029978627, 	Accuracy:0.1747
Epoch 1 	Loss: 257.79749766627356, 	Accuracy:0.1831
Epoch 2 	Loss: 235.32624918508512, 	Accuracy:0.2033
Epoch 3 	Loss: 213.0926262140702, 	Accuracy:0.2445
Epoch 4 	Loss: 204.1270271081117, 	Accuracy:0.2216
Epoch 5 	Loss: 199.04056755822256, 	Accuracy:0.2534
Epoch 6 	Loss: 185.07544936385617, 	Accuracy:0.2031
Epoch 7 	Loss: 186.58714921725817, 	Accuracy:0.245
Epoch 8 	Loss: 181.58265884084247, 	Accuracy:0.2336
Epoch 9 	Loss: 180.98593982297973, 	Accuracy:0.2423

Best accuracy is 0.2534 in 5 epoch
-------------------------------------------------- 
 learning_rate = 1e-08 	batch_size = 256

Epoch 0 	Loss: 0.02716366986995123, 	Accuracy:0.2585
Epoch 1 	Loss: 0.022451895979758132, 	Accuracy:0.3058
Epoch 2 	Loss: 0.021150416523891286, 	Accuracy:0.3227
Epoch 3 	Loss: 0.020427497686612686, 	Accuracy:0.3111
Epoch 4 	Loss: 0.019968928950270352, 	Accuracy:0.3385
Epoch 5 	Loss: 0.01962436014751914, 	Accuracy:0.3348
Epoch 6 	Loss: 0.019374622857821867, 	Accuracy:0.3392
Epoch 7 	Loss: 0.019174024942161746, 	Accuracy:0.3421
Epoch 8 	Loss: 0.018976220746712173, 	Accuracy:0.3431
Epoch 9 	Loss: 0.01885014813505244, 	Accuracy:0.3455

Best accuracy is 0.3455 in 9 epoch
-------------------------------------------------- 
 learning_rate = 1e-08 	batch_size = 2048

Epoch 0 	Loss: 0.0043224021744806785, 	Accuracy:0.1401
Epoch 1 	Loss: 0.003833233508265536, 	Accuracy:0.1734
Epoch 2 	Loss: 0.0035402594134510326, 	Accuracy:0.2084
Epoch 3 	Loss: 0.003349777799436194, 	Accuracy:0.2294
Epoch 4 	Loss: 0.0032198315520998943, 	Accuracy:0.2473
Epoch 5 	Loss: 0.003126003022238365, 	Accuracy:0.2567
Epoch 6 	Loss: 0.003057368343669502, 	Accuracy:0.2585
Epoch 7 	Loss: 0.003004541789346554, 	Accuracy:0.267
Epoch 8 	Loss: 0.0029478368956806856, 	Accuracy:0.2828
Epoch 9 	Loss: 0.0029102436683368467, 	Accuracy:0.283

Best accuracy is 0.283 in 9 epoch

	0	1	2	3	4	5	6	7	8	9	...	20	21	22	23	24	25	26	27	28	29
count	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	...	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000	569.000000
mean	14.127292	19.289649	91.969033	654.889104	0.096360	0.104341	0.088799	0.048919	0.181162	0.062798	...	16.269190	25.677223	107.261213	880.583128	0.132369	0.254265	0.272188	0.114606	0.290076	0.083946
std	3.524049	4.301036	24.298981	351.914129	0.014064	0.052813	0.079720	0.038803	0.027414	0.007060	...	4.833242	6.146258	33.602542	569.356993	0.022832	0.157336	0.208624	0.065732	0.061867	0.018061
min	6.981000	9.710000	43.790000	143.500000	0.052630	0.019380	0.000000	0.000000	0.106000	0.049960	...	7.930000	12.020000	50.410000	185.200000	0.071170	0.027290	0.000000	0.000000	0.156500	0.055040
25%	11.700000	16.170000	75.170000	420.300000	0.086370	0.064920	0.029560	0.020310	0.161900	0.057700	...	13.010000	21.080000	84.110000	515.300000	0.116600	0.147200	0.114500	0.064930	0.250400	0.071460
50%	13.370000	18.840000	86.240000	551.100000	0.095870	0.092630	0.061540	0.033500	0.179200	0.061540	...	14.970000	25.410000	97.660000	686.500000	0.131300	0.211900	0.226700	0.099930	0.282200	0.080040
75%	15.780000	21.800000	104.100000	782.700000	0.105300	0.130400	0.130700	0.074000	0.195700	0.066120	...	18.790000	29.720000	125.400000	1084.000000	0.146000	0.339100	0.382900	0.161400	0.317900	0.092080
max	28.110000	39.280000	188.500000	2501.000000	0.163400	0.345400	0.426800	0.201200	0.304000	0.097440	...	36.040000	49.540000	251.200000	4254.000000	0.222600	1.058000	1.252000	0.291000	0.663800	0.207500

	0	1	2	3	4	5	6	7	8	9	...	20	21	22	23	24	25	26	27	28	29
count	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	...	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02	5.690000e+02
mean	-3.153111e-15	-6.568462e-15	-6.993039e-16	-8.553985e-16	6.081447e-15	-1.136369e-15	-2.997017e-16	1.023981e-15	-1.860648e-15	-1.504752e-15	...	-2.297713e-15	1.742016e-15	-1.198807e-15	6.118909e-16	-5.094929e-15	-2.122887e-15	6.118909e-16	-1.998011e-16	-2.422589e-15	2.497514e-15
std	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	...	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00	1.000880e+00
min	-2.029648e+00	-2.229249e+00	-1.984504e+00	-1.454443e+00	-3.112085e+00	-1.610136e+00	-1.114873e+00	-1.261820e+00	-2.744117e+00	-1.819865e+00	...	-1.726901e+00	-2.223994e+00	-1.693361e+00	-1.222423e+00	-2.682695e+00	-1.443878e+00	-1.305831e+00	-1.745063e+00	-2.160960e+00	-1.601839e+00
25%	-6.893853e-01	-7.259631e-01	-6.919555e-01	-6.671955e-01	-7.109628e-01	-7.470860e-01	-7.437479e-01	-7.379438e-01	-7.032397e-01	-7.226392e-01	...	-6.749213e-01	-7.486293e-01	-6.895783e-01	-6.421359e-01	-6.912304e-01	-6.810833e-01	-7.565142e-01	-7.563999e-01	-6.418637e-01	-6.919118e-01
50%	-2.150816e-01	-1.046362e-01	-2.359800e-01	-2.951869e-01	-3.489108e-02	-2.219405e-01	-3.422399e-01	-3.977212e-01	-7.162650e-02	-1.782793e-01	...	-2.690395e-01	-4.351564e-02	-2.859802e-01	-3.411812e-01	-4.684277e-02	-2.695009e-01	-2.182321e-01	-2.234689e-01	-1.274095e-01	-2.164441e-01
75%	4.693926e-01	5.841756e-01	4.996769e-01	3.635073e-01	6.361990e-01	4.938569e-01	5.260619e-01	6.469351e-01	5.307792e-01	4.709834e-01	...	5.220158e-01	6.583411e-01	5.402790e-01	3.575891e-01	5.975448e-01	5.396688e-01	5.311411e-01	7.125100e-01	4.501382e-01	4.507624e-01
max	3.971288e+00	4.651889e+00	3.976130e+00	5.250529e+00	4.770911e+00	4.568425e+00	4.243589e+00	3.927930e+00	4.484751e+00	4.910919e+00	...	4.094189e+00	3.885905e+00	4.287337e+00	5.930172e+00	3.955374e+00	5.112877e+00	4.700669e+00	2.685877e+00	6.046041e+00	6.846856e+00

	alcohol	malic_acid	ash	alcalinity_of_ash	magnesium	total_phenols	flavanoids	nonflavanoid_phenols	proanthocyanins	color_intensity	hue	od280/od315_of_diluted_wines	proline
0	14.23	1.71	2.43	15.6	127.0	2.80	3.06	0.28	2.29	5.64	1.04	3.92	1065.0
1	13.20	1.78	2.14	11.2	100.0	2.65	2.76	0.26	1.28	4.38	1.05	3.40	1050.0
2	13.16	2.36	2.67	18.6	101.0	2.80	3.24	0.30	2.81	5.68	1.03	3.17	1185.0
3	14.37	1.95	2.50	16.8	113.0	3.85	3.49	0.24	2.18	7.80	0.86	3.45	1480.0
4	13.24	2.59	2.87	21.0	118.0	2.80	2.69	0.39	1.82	4.32	1.04	2.93	735.0

	Hours	Scores
0	2.5	21
1	5.1	47
2	3.2	27
3	8.5	75
4	3.5	30

Линейные модели. Задача регресии¶

Линейная регрессия¶

Своя реализация линейной регрессии¶

Модель линейной регрессии из библиотеки scikit-learn¶

Случай многих переменных и практические особенности работы с линейными моделями¶

Нормализация данных¶

Проблема переобучения в случае линейных моделей¶

1D классификация¶

2D классификация¶

3D классификация¶

Геометрическая интерпретация¶

Единообразный подход к учету смещения¶

Линейный классификатор с пороговой функций принятия решения. Hard Margin Classifier¶

Линейный классификатор с Hinge loss. Soft Margin Classifier¶

Метод градиентного спуска¶

Численный расчет производной¶

Аналитический расчет производной от функции потерь¶

Простые производные¶

Chain-rule¶

Часть MSE-loss¶

MSE-loss¶

Часть MAE-Loss¶

Max-Loss¶

Hinge loss¶

Выбор шага обучения¶

Алгоритм стохастического градиентного спуска. Выбор размера батча¶

Регуляризация¶

Обобщенные линейные модели¶

Kernel SVM¶

Вероятностный подход в задаче классификации¶

Наивный Байесовский классификатор¶

Кросс-энтропия как общая функция потерь для задач классификации¶

Переход к вероятностям¶

Практическое вычисление SoftMax¶

Расстояние (дивергенция) Кульбака — Лейблера¶

Переход к оценке модели¶

Энтропия¶

Расчет функции потерь¶

Кросс-энтропия vs Hinge loss¶

Градиент функции потерь. Кросс-энтропия¶

Пример обучения линейного классификатора с hinge loss¶

MNIST¶

CIFAR-10¶