add assignment 2

5 years ago · e24d001813
parent 3ec44aecf0
commit e24d001813
12 changed files with 2178 additions and 0 deletions
--- a/lab_1-2/assignment1.ipynb
+++ b/lab_1-2/assignment1.ipynb
--- a/lab_1-2/assignment2.ipynb
+++ b/lab_1-2/assignment2.ipynb
@ -0,0 +1,313 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Лабораторная работа 2\n",
+    "\n",
+    "## Полносвязная нейронная сеть\n",
+    "\n",
+    "Реализовать нейронную сеть, состоящую из двух полносвязных слоев и решающую задачу классификации на наборе данных из лабораторной работы 1."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from scripts.classifiers.neural_net import TwoLayerNet\n",
+    "\n",
+    "%matplotlib inline\n",
+    "plt.rcParams['figure.figsize'] = (10.0, 8.0) \n",
+    "plt.rcParams['image.interpolation'] = 'nearest'\n",
+    "plt.rcParams['image.cmap'] = 'gray'\n",
+    "\n",
+    "\n",
+    "def rel_error(x, y):\n",
+    "    \"\"\" returns relative error \"\"\"\n",
+    "    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "1. Добавьте реализации методов класса TwoLayerNet . Проверьте вашу реализацию на модельных данных (Код приведен ниже).  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_size = 4\n",
+    "hidden_size = 10\n",
+    "num_classes = 3\n",
+    "num_inputs = 5\n",
+    "\n",
+    "def init_toy_model():\n",
+    "    np.random.seed(0)\n",
+    "    return TwoLayerNet(input_size, hidden_size, num_classes, std=1e-1)\n",
+    "\n",
+    "def init_toy_data():\n",
+    "    np.random.seed(1)\n",
+    "    X = 10 * np.random.randn(num_inputs, input_size)\n",
+    "    y = np.array([0, 1, 2, 2, 1])\n",
+    "    return X, y\n",
+    "\n",
+    "net = init_toy_model()\n",
+    "X, y = init_toy_data()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Прямой проход: вычисление выхода сети\n",
+    "\n",
+    "Реализуйте первую часть  метода TwoLayerNet.loss, вычисляющую оценки классов для входных данных. \n",
+    "\n",
+    "Сравните ваш выход сети с эталонными значениями. Ошибка должна быть очень маленькой (можете ориентироваться на значение < 1e-7) ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "scores = net.loss(X)\n",
+    "print('Your scores:')\n",
+    "print(scores)\n",
+    "print()\n",
+    "print('correct scores:')\n",
+    "correct_scores = np.asarray([\n",
+    "  [-0.81233741, -1.27654624, -0.70335995],\n",
+    "  [-0.17129677, -1.18803311, -0.47310444],\n",
+    "  [-0.51590475, -1.01354314, -0.8504215 ],\n",
+    "  [-0.15419291, -0.48629638, -0.52901952],\n",
+    "  [-0.00618733, -0.12435261, -0.15226949]])\n",
+    "print(correct_scores)\n",
+    "print()\n",
+    "\n",
+    "\n",
+    "print('Difference between your scores and correct scores:')\n",
+    "print(np.sum(np.abs(scores - correct_scores)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "# Прямой проход: вычисление loss\n",
+    "\n",
+    "Реализуйте вторую часть метода, вычисляющую значение функции потерь. Сравните с эталоном. Ошибка должна быть очень маленькой (можете ориентироваться на значение < 1e-12) ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "loss, _ = net.loss(X, y, reg=0.05)\n",
+    "correct_loss = 1.30378789133\n",
+    "\n",
+    "print('Difference between your loss and correct loss:')\n",
+    "print(np.sum(np.abs(loss - correct_loss)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Обратный проход\n",
+    "\n",
+    "Реализуйте третью часть метода loss. Используйте численную реализацию расчета градиента для проверки вашей реализации обратного прохода.  Если прямой и обратный проходы реализованы верно, то ошибка будет < 1e-8 для каждой из переменных W1, W2, b1, и b2. \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scripts.gradient_check import eval_numerical_gradient\n",
+    "\n",
+    "loss, grads = net.loss(X, y, reg=0.05)\n",
+    "\n",
+    "for param_name in grads:\n",
+    "    f = lambda W: net.loss(X, y, reg=0.05)[0]\n",
+    "    param_grad_num = eval_numerical_gradient(f, net.params[param_name], verbose=False)\n",
+    "    print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Обучение нейронной сети на смоделированных данных\n",
+    "\n",
+    "Реализуйте методы TwoLayerNet.train и TwoLayerNet.predict. Обучайте сеть до тех пор, пока значение loss не будет < 0.02.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "net = init_toy_model()\n",
+    "stats = net.train(X, y, X, y,\n",
+    "            learning_rate=1e-1, reg=5e-6,\n",
+    "            num_iters=100, verbose=False)\n",
+    "\n",
+    "print('Final training loss: ', stats['loss_history'][-1])\n",
+    "\n",
+    "\n",
+    "plt.plot(stats['loss_history'])\n",
+    "plt.xlabel('iteration')\n",
+    "plt.ylabel('training loss')\n",
+    "plt.title('Training Loss history')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Обучение нейронной сети на реальном наборе данных (CIFAR-10, MNIST)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Загрузите набор данных, соответствующий вашему варианту. \n",
+    "\n",
+    "Разделите данные на обучающую, тестовую и валидационную выборки.\n",
+    "\n",
+    "Выполните предобработку данных, как в ЛР 1. \n",
+    "\n",
+    "Обучите нейронную сеть на ваших данных. \n",
+    "\n",
+    "При сдаче лабораторной работы объясните значения всех параметров метода train."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_size = 32 * 32 * 3\n",
+    "hidden_size = 50\n",
+    "num_classes = 10\n",
+    "net = TwoLayerNet(input_size, hidden_size, num_classes)\n",
+    "\n",
+    "stats = net.train(X_train, y_train, X_val, y_val,\n",
+    "            num_iters=1000, batch_size=200,\n",
+    "            learning_rate=1e-4, learning_rate_decay=0.95,\n",
+    "            reg=0.25, verbose=True)\n",
+    "\n",
+    "val_acc = (net.predict(X_val) == y_val).mean()\n",
+    "print('Validation accuracy: ', val_acc)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Используя параметры по умолчанию, вы можете получить accuracy, примерно равный 0.29. \n",
+    "\n",
+    "Проведите настройку гиперпараметров для увеличения accuracy. Поэкспериментируйте со значениями гиперпараметров, например, с количеством скрытых слоев, количеством эпох, скорости обучения и др. Ваша цель - максимально увеличить accuracy полносвязной сети на валидационном наборе. Различные эксперименты приветствуются. Например, вы можете использовать методы для сокращения размерности признакового пространства (например, PCA), добавить dropout слои и др. \n",
+    "\n",
+    "Для лучшей модели вычислите acсuracy на тестовом наборе. \n",
+    "\n",
+    "Для отладки процесса обучения часто помогают графики изменения loss и accuracy в процессе обучения. Ниже приведен код построения таких графиков. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.subplot(2, 1, 1)\n",
+    "plt.plot(stats['loss_history'])\n",
+    "plt.title('Loss history')\n",
+    "plt.xlabel('Iteration')\n",
+    "plt.ylabel('Loss')\n",
+    "\n",
+    "plt.subplot(2, 1, 2)\n",
+    "plt.plot(stats['train_acc_history'], label='train')\n",
+    "plt.plot(stats['val_acc_history'], label='val')\n",
+    "plt.title('Classification accuracy history')\n",
+    "plt.xlabel('Epoch')\n",
+    "plt.ylabel('Classification accuracy')\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scripts.vis_utils import visualize_grid\n",
+    "\n",
+    "def show_net_weights(net):\n",
+    "    W1 = net.params['W1']\n",
+    "    W1 = W1.reshape(32, 32, 3, -1).transpose(3, 0, 1, 2)\n",
+    "    plt.imshow(visualize_grid(W1, padding=3).astype('uint8'))\n",
+    "    plt.gca().axis('off')\n",
+    "    plt.show()\n",
+    "\n",
+    "show_net_weights(net)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Сделайте выводы по результатам работы. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/lab_1-2/scripts/classifiers/init.py
+++ b/lab_1-2/scripts/classifiers/init.py
@ -0,0 +1,2 @@
+from scripts.classifiers.k_nearest_neighbor import *
+from scripts.classifiers.linear_classifier import *
--- a/lab_1-2/scripts/classifiers/k_nearest_neighbor.py
+++ b/lab_1-2/scripts/classifiers/k_nearest_neighbor.py
@ -0,0 +1,183 @@
+from builtins import range
+from builtins import object
+import numpy as np
+from past.builtins import xrange
+
+
+class KNearestNeighbor(object):
+    """ a kNN classifier with L2 distance """
+
+    def __init__(self):
+        pass
+
+    def train(self, X, y):
+        """
+        Train the classifier. For k-nearest neighbors this is just
+        memorizing the training data.
+
+        Inputs:
+        - X: A numpy array of shape (num_train, D) containing the training data
+          consisting of num_train samples each of dimension D.
+        - y: A numpy array of shape (N,) containing the training labels, where
+             y[i] is the label for X[i].
+        """
+        self.X_train = X
+        self.y_train = y
+
+    def predict(self, X, k=1, num_loops=0):
+        """
+        Predict labels for test data using this classifier.
+
+        Inputs:
+        - X: A numpy array of shape (num_test, D) containing test data consisting
+             of num_test samples each of dimension D.
+        - k: The number of nearest neighbors that vote for the predicted labels.
+        - num_loops: Determines which implementation to use to compute distances
+          between training points and testing points.
+
+        Returns:
+        - y: A numpy array of shape (num_test,) containing predicted labels for the
+          test data, where y[i] is the predicted label for the test point X[i].
+        """
+        if num_loops == 0:
+            dists = self.compute_distances_no_loops(X)
+        elif num_loops == 1:
+            dists = self.compute_distances_one_loop(X)
+        elif num_loops == 2:
+            dists = self.compute_distances_two_loops(X)
+        else:
+            raise ValueError('Invalid value %d for num_loops' % num_loops)
+
+        return self.predict_labels(dists, k=k)
+
+    def compute_distances_two_loops(self, X):
+        """
+        Compute the distance between each test point in X and each training point
+        in self.X_train using a nested loop over both the training data and the
+        test data.
+
+        Inputs:
+        - X: A numpy array of shape (num_test, D) containing test data.
+
+        Returns:
+        - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
+          is the Euclidean distance between the ith test point and the jth training
+          point.
+        """
+        num_test = X.shape[0]
+        num_train = self.X_train.shape[0]
+        dists = np.zeros((num_test, num_train))
+        for i in range(num_test):
+            for j in range(num_train):
+                #####################################################################
+                # TODO:                                                             #
+                # Compute the l2 distance between the ith test point and the jth    #
+                # training point, and store the result in dists[i, j]. You should   #
+                # not use a loop over dimension, nor use np.linalg.norm().          #
+                #####################################################################
+                # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+                pass
+
+                # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+        return dists
+
+    def compute_distances_one_loop(self, X):
+        """
+        Compute the distance between each test point in X and each training point
+        in self.X_train using a single loop over the test data.
+
+        Input / Output: Same as compute_distances_two_loops
+        """
+        num_test = X.shape[0]
+        num_train = self.X_train.shape[0]
+        dists = np.zeros((num_test, num_train))
+        for i in range(num_test):
+            #######################################################################
+            # TODO:                                                               #
+            # Compute the l2 distance between the ith test point and all training #
+            # points, and store the result in dists[i, :].                        #
+            # Do not use np.linalg.norm().                                        #
+            #######################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            pass
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+        return dists
+
+    def compute_distances_no_loops(self, X):
+        """
+        Compute the distance between each test point in X and each training point
+        in self.X_train using no explicit loops.
+
+        Input / Output: Same as compute_distances_two_loops
+        """
+        num_test = X.shape[0]
+        num_train = self.X_train.shape[0]
+        dists = np.zeros((num_test, num_train))
+        #########################################################################
+        # TODO:                                                                 #
+        # Compute the l2 distance between all test points and all training      #
+        # points without using any explicit loops, and store the result in      #
+        # dists.                                                                #
+        #                                                                       #
+        # You should implement this function using only basic array operations; #
+        # in particular you should not use functions from scipy,                #
+        # nor use np.linalg.norm().                                             #
+        #                                                                       #
+        # HINT: Try to formulate the l2 distance using matrix multiplication    #
+        #       and two broadcast sums.                                         #
+        #########################################################################
+        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        pass
+
+        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+        return dists
+
+    def predict_labels(self, dists, k=1):
+        """
+        Given a matrix of distances between test points and training points,
+        predict a label for each test point.
+
+        Inputs:
+        - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
+          gives the distance betwen the ith test point and the jth training point.
+
+        Returns:
+        - y: A numpy array of shape (num_test,) containing predicted labels for the
+          test data, where y[i] is the predicted label for the test point X[i].
+        """
+        num_test = dists.shape[0]
+        y_pred = np.zeros(num_test)
+        for i in range(num_test):
+            # A list of length k storing the labels of the k nearest neighbors to
+            # the ith test point.
+            closest_y = []
+            #########################################################################
+            # TODO:                                                                 #
+            # Use the distance matrix to find the k nearest neighbors of the ith    #
+            # testing point, and use self.y_train to find the labels of these       #
+            # neighbors. Store these labels in closest_y.                           #
+            # Hint: Look up the function numpy.argsort.                             #
+            #########################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            pass
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+            #########################################################################
+            # TODO:                                                                 #
+            # Now that you have found the labels of the k nearest neighbors, you    #
+            # need to find the most common label in the list closest_y of labels.   #
+            # Store this label in y_pred[i]. Break ties by choosing the smaller     #
+            # label.                                                                #
+            #########################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            pass
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        return y_pred
--- a/lab_1-2/scripts/classifiers/linear_classifier.py
+++ b/lab_1-2/scripts/classifiers/linear_classifier.py
@ -0,0 +1,139 @@
+from __future__ import print_function
+
+from builtins import range
+from builtins import object
+import numpy as np
+from scripts.classifiers.linear_svm import *
+from scripts.classifiers.softmax import *
+from past.builtins import xrange
+
+
+class LinearClassifier(object):
+
+    def __init__(self):
+        self.W = None
+
+    def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
+              batch_size=200, verbose=False):
+        """
+        Train this linear classifier using stochastic gradient descent.
+
+        Inputs:
+        - X: A numpy array of shape (N, D) containing training data; there are N
+          training samples each of dimension D.
+        - y: A numpy array of shape (N,) containing training labels; y[i] = c
+          means that X[i] has label 0 <= c < C for C classes.
+        - learning_rate: (float) learning rate for optimization.
+        - reg: (float) regularization strength.
+        - num_iters: (integer) number of steps to take when optimizing
+        - batch_size: (integer) number of training examples to use at each step.
+        - verbose: (boolean) If true, print progress during optimization.
+
+        Outputs:
+        A list containing the value of the loss function at each training iteration.
+        """
+        num_train, dim = X.shape
+        num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
+        if self.W is None:
+            # lazily initialize W
+            self.W = 0.001 * np.random.randn(dim, num_classes)
+
+        # Run stochastic gradient descent to optimize W
+        loss_history = []
+        for it in range(num_iters):
+            X_batch = None
+            y_batch = None
+
+            #########################################################################
+            # TODO:                                                                 #
+            # Sample batch_size elements from the training data and their           #
+            # corresponding labels to use in this round of gradient descent.        #
+            # Store the data in X_batch and their corresponding labels in           #
+            # y_batch; after sampling X_batch should have shape (batch_size, dim)   #
+            # and y_batch should have shape (batch_size,)                           #
+            #                                                                       #
+            # Hint: Use np.random.choice to generate indices. Sampling with         #
+            # replacement is faster than sampling without replacement.              #
+            #########################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            pass
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            # evaluate loss and gradient
+            loss, grad = self.loss(X_batch, y_batch, reg)
+            loss_history.append(loss)
+
+            # perform parameter update
+            #########################################################################
+            # TODO:                                                                 #
+            # Update the weights using the gradient and the learning rate.          #
+            #########################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            pass
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            if verbose and it % 100 == 0:
+                print('iteration %d / %d: loss %f' % (it, num_iters, loss))
+
+        return loss_history
+
+    def predict(self, X):
+        """
+        Use the trained weights of this linear classifier to predict labels for
+        data points.
+
+        Inputs:
+        - X: A numpy array of shape (N, D) containing training data; there are N
+          training samples each of dimension D.
+
+        Returns:
+        - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
+          array of length N, and each element is an integer giving the predicted
+          class.
+        """
+        y_pred = np.zeros(X.shape[0])
+        ###########################################################################
+        # TODO:                                                                   #
+        # Implement this method. Store the predicted labels in y_pred.            #
+        ###########################################################################
+        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        pass
+
+        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+        return y_pred
+
+    def loss(self, X_batch, y_batch, reg):
+        """
+        Compute the loss function and its derivative.
+        Subclasses will override this.
+
+        Inputs:
+        - X_batch: A numpy array of shape (N, D) containing a minibatch of N
+          data points; each point has dimension D.
+        - y_batch: A numpy array of shape (N,) containing labels for the minibatch.
+        - reg: (float) regularization strength.
+
+        Returns: A tuple containing:
+        - loss as a single float
+        - gradient with respect to self.W; an array of the same shape as W
+        """
+        pass
+
+
+class LinearSVM(LinearClassifier):
+    """ A subclass that uses the Multiclass SVM loss function """
+
+    def loss(self, X_batch, y_batch, reg):
+        return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
+
+
+class Softmax(LinearClassifier):
+    """ A subclass that uses the Softmax + Cross-entropy loss function """
+
+    def loss(self, X_batch, y_batch, reg):
+        return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
--- a/lab_1-2/scripts/classifiers/linear_svm.py
+++ b/lab_1-2/scripts/classifiers/linear_svm.py
@ -0,0 +1,100 @@
+from builtins import range
+import numpy as np
+from random import shuffle
+from past.builtins import xrange
+
+def svm_loss_naive(W, X, y, reg):
+    """
+    Structured SVM loss function, naive implementation (with loops).
+
+    Inputs have dimension D, there are C classes, and we operate on minibatches
+    of N examples.
+
+    Inputs:
+    - W: A numpy array of shape (D, C) containing weights.
+    - X: A numpy array of shape (N, D) containing a minibatch of data.
+    - y: A numpy array of shape (N,) containing training labels; y[i] = c means
+      that X[i] has label c, where 0 <= c < C.
+    - reg: (float) regularization strength
+
+    Returns a tuple of:
+    - loss as single float
+    - gradient with respect to weights W; an array of same shape as W
+    """
+    dW = np.zeros(W.shape) # initialize the gradient as zero
+
+    # compute the loss and the gradient
+    num_classes = W.shape[1]
+    num_train = X.shape[0]
+    loss = 0.0
+    for i in range(num_train):
+        scores = X[i].dot(W)
+        correct_class_score = scores[y[i]]
+        for j in range(num_classes):
+            if j == y[i]:
+                continue
+            margin = scores[j] - correct_class_score + 1 # note delta = 1
+            if margin > 0:
+                loss += margin
+
+    # Right now the loss is a sum over all training examples, but we want it
+    # to be an average instead so we divide by num_train.
+    loss /= num_train
+
+    # Add regularization to the loss.
+    loss += reg * np.sum(W * W)
+
+    #############################################################################
+    # TODO:                                                                     #
+    # Compute the gradient of the loss function and store it dW.                #
+    # Rather than first computing the loss and then computing the derivative,   #
+    # it may be simpler to compute the derivative at the same time that the     #
+    # loss is being computed. As a result you may need to modify some of the    #
+    # code above to compute the gradient.                                       #
+    #############################################################################
+    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+    pass
+
+    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+    
+    return loss, dW
+
+
+
+def svm_loss_vectorized(W, X, y, reg):
+    """
+    Structured SVM loss function, vectorized implementation.
+
+    Inputs and outputs are the same as svm_loss_naive.
+    """
+    loss = 0.0
+    dW = np.zeros(W.shape) # initialize the gradient as zero
+
+    #############################################################################
+    # TODO:                                                                     #
+    # Implement a vectorized version of the structured SVM loss, storing the    #
+    # result in loss.                                                           #
+    #############################################################################
+    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+    pass
+
+    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+    #############################################################################
+    # TODO:                                                                     #
+    # Implement a vectorized version of the gradient for the structured SVM     #
+    # loss, storing the result in dW.                                           #
+    #                                                                           #
+    # Hint: Instead of computing the gradient from scratch, it may be easier    #
+    # to reuse some of the intermediate values that you used to compute the     #
+    # loss.                                                                     #
+    #############################################################################
+    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+    pass
+
+    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+    return loss, dW
--- a/lab_1-2/scripts/classifiers/neural_net.py
+++ b/lab_1-2/scripts/classifiers/neural_net.py
@ -0,0 +1,225 @@
+from __future__ import print_function
+
+from builtins import range
+from builtins import object
+import numpy as np
+import matplotlib.pyplot as plt
+from past.builtins import xrange
+
+class TwoLayerNet(object):
+    """
+    A two-layer fully-connected neural network. The net has an input dimension of
+    N, a hidden layer dimension of H, and performs classification over C classes.
+    We train the network with a softmax loss function and L2 regularization on the
+    weight matrices. The network uses a ReLU nonlinearity after the first fully
+    connected layer.
+
+    In other words, the network has the following architecture:
+
+    input - fully connected layer - ReLU - fully connected layer - softmax
+
+    The outputs of the second fully-connected layer are the scores for each class.
+    """
+
+    def __init__(self, input_size, hidden_size, output_size, std=1e-4):
+        """
+        Initialize the model. Weights are initialized to small random values and
+        biases are initialized to zero. Weights and biases are stored in the
+        variable self.params, which is a dictionary with the following keys:
+
+        W1: First layer weights; has shape (D, H)
+        b1: First layer biases; has shape (H,)
+        W2: Second layer weights; has shape (H, C)
+        b2: Second layer biases; has shape (C,)
+
+        Inputs:
+        - input_size: The dimension D of the input data.
+        - hidden_size: The number of neurons H in the hidden layer.
+        - output_size: The number of classes C.
+        """
+        self.params = {}
+        self.params['W1'] = std * np.random.randn(input_size, hidden_size)
+        self.params['b1'] = np.zeros(hidden_size)
+        self.params['W2'] = std * np.random.randn(hidden_size, output_size)
+        self.params['b2'] = np.zeros(output_size)
+
+    def loss(self, X, y=None, reg=0.0):
+        """
+        Compute the loss and gradients for a two layer fully connected neural
+        network.
+
+        Inputs:
+        - X: Input data of shape (N, D). Each X[i] is a training sample.
+        - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
+          an integer in the range 0 <= y[i] < C. This parameter is optional; if it
+          is not passed then we only return scores, and if it is passed then we
+          instead return the loss and gradients.
+        - reg: Regularization strength.
+
+        Returns:
+        If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
+        the score for class c on input X[i].
+
+        If y is not None, instead return a tuple of:
+        - loss: Loss (data loss and regularization loss) for this batch of training
+          samples.
+        - grads: Dictionary mapping parameter names to gradients of those parameters
+          with respect to the loss function; has the same keys as self.params.
+        """
+        # Unpack variables from the params dictionary
+        W1, b1 = self.params['W1'], self.params['b1']
+        W2, b2 = self.params['W2'], self.params['b2']
+        N, D = X.shape
+
+        # Compute the forward pass
+        scores = None
+        #############################################################################
+        # TODO: Perform the forward pass, computing the class scores for the input. #
+        # Store the result in the scores variable, which should be an array of      #
+        # shape (N, C).                                                             #
+        #############################################################################
+        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        pass
+
+        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        # If the targets are not given then jump out, we're done
+        if y is None:
+            return scores
+
+        # Compute the loss
+        loss = None
+        #############################################################################
+        # TODO: Finish the forward pass, and compute the loss. This should include  #
+        # both the data loss and L2 regularization for W1 and W2. Store the result  #
+        # in the variable loss, which should be a scalar. Use the Softmax           #
+        # classifier loss.                                                          #
+        #############################################################################
+        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        pass
+
+        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        # Backward pass: compute gradients
+        grads = {}
+        #############################################################################
+        # TODO: Compute the backward pass, computing the derivatives of the weights #
+        # and biases. Store the results in the grads dictionary. For example,       #
+        # grads['W1'] should store the gradient on W1, and be a matrix of same size #
+        #############################################################################
+        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        pass
+
+        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        return loss, grads
+
+    def train(self, X, y, X_val, y_val,
+              learning_rate=1e-3, learning_rate_decay=0.95,
+              reg=5e-6, num_iters=100,
+              batch_size=200, verbose=False):
+        """
+        Train this neural network using stochastic gradient descent.
+
+        Inputs:
+        - X: A numpy array of shape (N, D) giving training data.
+        - y: A numpy array f shape (N,) giving training labels; y[i] = c means that
+          X[i] has label c, where 0 <= c < C.
+        - X_val: A numpy array of shape (N_val, D) giving validation data.
+        - y_val: A numpy array of shape (N_val,) giving validation labels.
+        - learning_rate: Scalar giving learning rate for optimization.
+        - learning_rate_decay: Scalar giving factor used to decay the learning rate
+          after each epoch.
+        - reg: Scalar giving regularization strength.
+        - num_iters: Number of steps to take when optimizing.
+        - batch_size: Number of training examples to use per step.
+        - verbose: boolean; if true print progress during optimization.
+        """
+        num_train = X.shape[0]
+        iterations_per_epoch = max(num_train / batch_size, 1)
+
+        # Use SGD to optimize the parameters in self.model
+        loss_history = []
+        train_acc_history = []
+        val_acc_history = []
+
+        for it in range(num_iters):
+            X_batch = None
+            y_batch = None
+
+            #########################################################################
+            # TODO: Create a random minibatch of training data and labels, storing  #
+            # them in X_batch and y_batch respectively.                             #
+            #########################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            pass
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            # Compute loss and gradients using the current minibatch
+            loss, grads = self.loss(X_batch, y=y_batch, reg=reg)
+            loss_history.append(loss)
+
+            #########################################################################
+            # TODO: Use the gradients in the grads dictionary to update the         #
+            # parameters of the network (stored in the dictionary self.params)      #
+            # using stochastic gradient descent. You'll need to use the gradients   #
+            # stored in the grads dictionary defined above.                         #
+            #########################################################################
+            # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            pass
+
+            # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+            if verbose and it % 100 == 0:
+                print('iteration %d / %d: loss %f' % (it, num_iters, loss))
+
+            # Every epoch, check train and val accuracy and decay learning rate.
+            if it % iterations_per_epoch == 0:
+                # Check accuracy
+                train_acc = (self.predict(X_batch) == y_batch).mean()
+                val_acc = (self.predict(X_val) == y_val).mean()
+                train_acc_history.append(train_acc)
+                val_acc_history.append(val_acc)
+
+                # Decay learning rate
+                learning_rate *= learning_rate_decay
+
+        return {
+          'loss_history': loss_history,
+          'train_acc_history': train_acc_history,
+          'val_acc_history': val_acc_history,
+        }
+
+    def predict(self, X):
+        """
+        Use the trained weights of this two-layer network to predict labels for
+        data points. For each data point we predict scores for each of the C
+        classes, and assign each data point to the class with the highest score.
+
+        Inputs:
+        - X: A numpy array of shape (N, D) giving N D-dimensional data points to
+          classify.
+
+        Returns:
+        - y_pred: A numpy array of shape (N,) giving predicted labels for each of
+          the elements of X. For all i, y_pred[i] = c means that X[i] is predicted
+          to have class c, where 0 <= c < C.
+        """
+        y_pred = None
+
+        ###########################################################################
+        # TODO: Implement this function; it should be VERY simple!                #
+        ###########################################################################
+        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        pass
+
+        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+        return y_pred
--- a/lab_1-2/scripts/classifiers/softmax.py
+++ b/lab_1-2/scripts/classifiers/softmax.py
@ -0,0 +1,65 @@
+from builtins import range
+import numpy as np
+from random import shuffle
+from past.builtins import xrange
+
+def softmax_loss_naive(W, X, y, reg):
+    """
+    Softmax loss function, naive implementation (with loops)
+
+    Inputs have dimension D, there are C classes, and we operate on minibatches
+    of N examples.
+
+    Inputs:
+    - W: A numpy array of shape (D, C) containing weights.
+    - X: A numpy array of shape (N, D) containing a minibatch of data.
+    - y: A numpy array of shape (N,) containing training labels; y[i] = c means
+      that X[i] has label c, where 0 <= c < C.
+    - reg: (float) regularization strength
+
+    Returns a tuple of:
+    - loss as single float
+    - gradient with respect to weights W; an array of same shape as W
+    """
+    # Initialize the loss and gradient to zero.
+    loss = 0.0
+    dW = np.zeros_like(W)
+
+    #############################################################################
+    # TODO: Compute the softmax loss and its gradient using explicit loops.     #
+    # Store the loss in loss and the gradient in dW. If you are not careful     #
+    # here, it is easy to run into numeric instability. Don't forget the        #
+    # regularization!                                                           #
+    #############################################################################
+    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+    pass
+
+    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+    return loss, dW
+
+
+def softmax_loss_vectorized(W, X, y, reg):
+    """
+    Softmax loss function, vectorized version.
+
+    Inputs and outputs are the same as softmax_loss_naive.
+    """
+    # Initialize the loss and gradient to zero.
+    loss = 0.0
+    dW = np.zeros_like(W)
+
+    #############################################################################
+    # TODO: Compute the softmax loss and its gradient using no explicit loops.  #
+    # Store the loss in loss and the gradient in dW. If you are not careful     #
+    # here, it is easy to run into numeric instability. Don't forget the        #
+    # regularization!                                                           #
+    #############################################################################
+    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+    pass
+
+    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
+
+    return loss, dW
--- a/lab_1-2/scripts/data_utils.py
+++ b/lab_1-2/scripts/data_utils.py
@ -0,0 +1,262 @@
+from __future__ import print_function
+
+from builtins import range
+from six.moves import cPickle as pickle
+import numpy as np
+import os
+from imageio import imread
+import platform
+
+def load_pickle(f):
+    version = platform.python_version_tuple()
+    if version[0] == '2':
+        return  pickle.load(f)
+    elif version[0] == '3':
+        return  pickle.load(f, encoding='latin1')
+    raise ValueError("invalid python version: {}".format(version))
+
+def load_CIFAR_batch(filename):
+    """ load single batch of cifar """
+    with open(filename, 'rb') as f:
+        datadict = load_pickle(f)
+        X = datadict['data']
+        Y = datadict['labels']
+        X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
+        Y = np.array(Y)
+        return X, Y
+
+def load_CIFAR10(ROOT):
+    """ load all of cifar """
+    xs = []
+    ys = []
+    for b in range(1,6):
+        f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
+        X, Y = load_CIFAR_batch(f)
+        xs.append(X)
+        ys.append(Y)
+    Xtr = np.concatenate(xs)
+    Ytr = np.concatenate(ys)
+    del X, Y
+    Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
+    return Xtr, Ytr, Xte, Yte
+
+
+def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
+                     subtract_mean=True):
+    """
+    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
+    it for classifiers. These are the same steps as we used for the SVM, but
+    condensed to a single function.
+    """
+    # Load the raw CIFAR-10 data
+    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
+    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
+
+    # Subsample the data
+    mask = list(range(num_training, num_training + num_validation))
+    X_val = X_train[mask]
+    y_val = y_train[mask]
+    mask = list(range(num_training))
+    X_train = X_train[mask]
+    y_train = y_train[mask]
+    mask = list(range(num_test))
+    X_test = X_test[mask]
+    y_test = y_test[mask]
+
+    # Normalize the data: subtract the mean image
+    if subtract_mean:
+        mean_image = np.mean(X_train, axis=0)
+        X_train -= mean_image
+        X_val -= mean_image
+        X_test -= mean_image
+
+    # Transpose so that channels come first
+    X_train = X_train.transpose(0, 3, 1, 2).copy()
+    X_val = X_val.transpose(0, 3, 1, 2).copy()
+    X_test = X_test.transpose(0, 3, 1, 2).copy()
+
+    # Package data into a dictionary
+    return {
+      'X_train': X_train, 'y_train': y_train,
+      'X_val': X_val, 'y_val': y_val,
+      'X_test': X_test, 'y_test': y_test,
+    }
+
+
+def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
+    """
+    Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
+    TinyImageNet-200 have the same directory structure, so this can be used
+    to load any of them.
+
+    Inputs:
+    - path: String giving path to the directory to load.
+    - dtype: numpy datatype used to load the data.
+    - subtract_mean: Whether to subtract the mean training image.
+
+    Returns: A dictionary with the following entries:
+    - class_names: A list where class_names[i] is a list of strings giving the
+      WordNet names for class i in the loaded dataset.
+    - X_train: (N_tr, 3, 64, 64) array of training images
+    - y_train: (N_tr,) array of training labels
+    - X_val: (N_val, 3, 64, 64) array of validation images
+    - y_val: (N_val,) array of validation labels
+    - X_test: (N_test, 3, 64, 64) array of testing images.
+    - y_test: (N_test,) array of test labels; if test labels are not available
+      (such as in student code) then y_test will be None.
+    - mean_image: (3, 64, 64) array giving mean training image
+    """
+    # First load wnids
+    with open(os.path.join(path, 'wnids.txt'), 'r') as f:
+        wnids = [x.strip() for x in f]
+
+    # Map wnids to integer labels
+    wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
+
+    # Use words.txt to get names for each class
+    with open(os.path.join(path, 'words.txt'), 'r') as f:
+        wnid_to_words = dict(line.split('\t') for line in f)
+        for wnid, words in wnid_to_words.items():
+            wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
+    class_names = [wnid_to_words[wnid] for wnid in wnids]
+
+    # Next load training data.
+    X_train = []
+    y_train = []
+    for i, wnid in enumerate(wnids):
+        if (i + 1) % 20 == 0:
+            print('loading training data for synset %d / %d'
+                  % (i + 1, len(wnids)))
+        # To figure out the filenames we need to open the boxes file
+        boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
+        with open(boxes_file, 'r') as f:
+            filenames = [x.split('\t')[0] for x in f]
+        num_images = len(filenames)
+
+        X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
+        y_train_block = wnid_to_label[wnid] * \
+                        np.ones(num_images, dtype=np.int64)
+        for j, img_file in enumerate(filenames):
+            img_file = os.path.join(path, 'train', wnid, 'images', img_file)
+            img = imread(img_file)
+            if img.ndim == 2:
+        ## grayscale file
+                img.shape = (64, 64, 1)
+            X_train_block[j] = img.transpose(2, 0, 1)
+        X_train.append(X_train_block)
+        y_train.append(y_train_block)
+
+    # We need to concatenate all training data
+    X_train = np.concatenate(X_train, axis=0)
+    y_train = np.concatenate(y_train, axis=0)
+
+    # Next load validation data
+    with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
+        img_files = []
+        val_wnids = []
+        for line in f:
+            img_file, wnid = line.split('\t')[:2]
+            img_files.append(img_file)
+            val_wnids.append(wnid)
+        num_val = len(img_files)
+        y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
+        X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
+        for i, img_file in enumerate(img_files):
+            img_file = os.path.join(path, 'val', 'images', img_file)
+            img = imread(img_file)
+            if img.ndim == 2:
+                img.shape = (64, 64, 1)
+            X_val[i] = img.transpose(2, 0, 1)
+
+    # Next load test images
+    # Students won't have test labels, so we need to iterate over files in the
+    # images directory.
+    img_files = os.listdir(os.path.join(path, 'test', 'images'))
+    X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
+    for i, img_file in enumerate(img_files):
+        img_file = os.path.join(path, 'test', 'images', img_file)
+        img = imread(img_file)
+        if img.ndim == 2:
+            img.shape = (64, 64, 1)
+        X_test[i] = img.transpose(2, 0, 1)
+
+    y_test = None
+    y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
+    if os.path.isfile(y_test_file):
+        with open(y_test_file, 'r') as f:
+            img_file_to_wnid = {}
+            for line in f:
+                line = line.split('\t')
+                img_file_to_wnid[line[0]] = line[1]
+        y_test = [wnid_to_label[img_file_to_wnid[img_file]]
+                  for img_file in img_files]
+        y_test = np.array(y_test)
+
+    mean_image = X_train.mean(axis=0)
+    if subtract_mean:
+        X_train -= mean_image[None]
+        X_val -= mean_image[None]
+        X_test -= mean_image[None]
+
+    return {
+      'class_names': class_names,
+      'X_train': X_train,
+      'y_train': y_train,
+      'X_val': X_val,
+      'y_val': y_val,
+      'X_test': X_test,
+      'y_test': y_test,
+      'class_names': class_names,
+      'mean_image': mean_image,
+    }
+
+
+def load_models(models_dir):
+    """
+    Load saved models from disk. This will attempt to unpickle all files in a
+    directory; any files that give errors on unpickling (such as README.txt)
+    will be skipped.
+
+    Inputs:
+    - models_dir: String giving the path to a directory containing model files.
+      Each model file is a pickled dictionary with a 'model' field.
+
+    Returns:
+    A dictionary mapping model file names to models.
+    """
+    models = {}
+    for model_file in os.listdir(models_dir):
+        with open(os.path.join(models_dir, model_file), 'rb') as f:
+            try:
+                models[model_file] = load_pickle(f)['model']
+            except pickle.UnpicklingError:
+                continue
+    return models
+
+
+def load_imagenet_val(num=None):
+    """Load a handful of validation images from ImageNet.
+
+    Inputs:
+    - num: Number of images to load (max of 25)
+
+    Returns:
+    - X: numpy array with shape [num, 224, 224, 3]
+    - y: numpy array of integer image labels, shape [num]
+    - class_names: dict mapping integer label to class name
+    """
+    imagenet_fn = 'cs231n/datasets/imagenet_val_25.npz'
+    if not os.path.isfile(imagenet_fn):
+      print('file %s not found' % imagenet_fn)
+      print('Run the following:')
+      print('cd cs231n/datasets')
+      print('bash get_imagenet_val.sh')
+      assert False, 'Need to download imagenet_val_25.npz'
+    f = np.load(imagenet_fn)
+    X = f['X']
+    y = f['y']
+    class_names = f['label_map'].item()
+    if num is not None:
+        X = X[:num]
+        y = y[:num]
+    return X, y, class_names
--- a/lab_1-2/scripts/datasets/get_datasets.sh
+++ b/lab_1-2/scripts/datasets/get_datasets.sh
@ -0,0 +1,4 @@
+# Get CIFAR10
+wget http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -O cifar-10-python.tar.gz
+tar -xzvf cifar-10-python.tar.gz
+rm cifar-10-python.tar.gz 
--- a/lab_1-2/scripts/gradient_check.py
+++ b/lab_1-2/scripts/gradient_check.py
@ -0,0 +1,129 @@
+from __future__ import print_function
+from builtins import range
+from past.builtins import xrange
+
+import numpy as np
+from random import randrange
+
+def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
+    """
+    a naive implementation of numerical gradient of f at x
+    - f should be a function that takes a single argument
+    - x is the point (numpy array) to evaluate the gradient at
+    """
+
+    fx = f(x) # evaluate function value at original point
+    grad = np.zeros_like(x)
+    # iterate over all indexes in x
+    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
+    while not it.finished:
+
+        # evaluate function at x+h
+        ix = it.multi_index
+        oldval = x[ix]
+        x[ix] = oldval + h # increment by h
+        fxph = f(x) # evalute f(x + h)
+        x[ix] = oldval - h
+        fxmh = f(x) # evaluate f(x - h)
+        x[ix] = oldval # restore
+
+        # compute the partial derivative with centered formula
+        grad[ix] = (fxph - fxmh) / (2 * h) # the slope
+        if verbose:
+            print(ix, grad[ix])
+        it.iternext() # step to next dimension
+
+    return grad
+
+
+def eval_numerical_gradient_array(f, x, df, h=1e-5):
+    """
+    Evaluate a numeric gradient for a function that accepts a numpy
+    array and returns a numpy array.
+    """
+    grad = np.zeros_like(x)
+    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
+    while not it.finished:
+        ix = it.multi_index
+
+        oldval = x[ix]
+        x[ix] = oldval + h
+        pos = f(x).copy()
+        x[ix] = oldval - h
+        neg = f(x).copy()
+        x[ix] = oldval
+
+        grad[ix] = np.sum((pos - neg) * df) / (2 * h)
+        it.iternext()
+    return grad
+
+
+def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
+    """
+    Compute numeric gradients for a function that operates on input
+    and output blobs.
+
+    We assume that f accepts several input blobs as arguments, followed by a
+    blob where outputs will be written. For example, f might be called like:
+
+    f(x, w, out)
+
+    where x and w are input Blobs, and the result of f will be written to out.
+
+    Inputs:
+    - f: function
+    - inputs: tuple of input blobs
+    - output: output blob
+    - h: step size
+    """
+    numeric_diffs = []
+    for input_blob in inputs:
+        diff = np.zeros_like(input_blob.diffs)
+        it = np.nditer(input_blob.vals, flags=['multi_index'],
+                       op_flags=['readwrite'])
+        while not it.finished:
+            idx = it.multi_index
+            orig = input_blob.vals[idx]
+
+            input_blob.vals[idx] = orig + h
+            f(*(inputs + (output,)))
+            pos = np.copy(output.vals)
+            input_blob.vals[idx] = orig - h
+            f(*(inputs + (output,)))
+            neg = np.copy(output.vals)
+            input_blob.vals[idx] = orig
+
+            diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
+
+            it.iternext()
+        numeric_diffs.append(diff)
+    return numeric_diffs
+
+
+def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
+    return eval_numerical_gradient_blobs(lambda *args: net.forward(),
+                inputs, output, h=h)
+
+
+def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
+    """
+    sample a few random elements and only return numerical
+    in this dimensions.
+    """
+
+    for i in range(num_checks):
+        ix = tuple([randrange(m) for m in x.shape])
+
+        oldval = x[ix]
+        x[ix] = oldval + h # increment by h
+        fxph = f(x) # evaluate f(x + h)
+        x[ix] = oldval - h # increment by h
+        fxmh = f(x) # evaluate f(x - h)
+        x[ix] = oldval # reset
+
+        grad_numerical = (fxph - fxmh) / (2 * h)
+        grad_analytic = analytic_grad[ix]
+        rel_error = (abs(grad_numerical - grad_analytic) /
+                    (abs(grad_numerical) + abs(grad_analytic)))
+        print('numerical: %f analytic: %f, relative error: %e'
+              %(grad_numerical, grad_analytic, rel_error))
--- a/lab_1-2/scripts/vis_utils.py
+++ b/lab_1-2/scripts/vis_utils.py
@ -0,0 +1,73 @@
+from builtins import range
+from past.builtins import xrange
+
+from math import sqrt, ceil
+import numpy as np
+
+def visualize_grid(Xs, ubound=255.0, padding=1):
+    """
+    Reshape a 4D tensor of image data to a grid for easy visualization.
+
+    Inputs:
+    - Xs: Data of shape (N, H, W, C)
+    - ubound: Output grid will have values scaled to the range [0, ubound]
+    - padding: The number of blank pixels between elements of the grid
+    """
+    (N, H, W, C) = Xs.shape
+    grid_size = int(ceil(sqrt(N)))
+    grid_height = H * grid_size + padding * (grid_size - 1)
+    grid_width = W * grid_size + padding * (grid_size - 1)
+    grid = np.zeros((grid_height, grid_width, C))
+    next_idx = 0
+    y0, y1 = 0, H
+    for y in range(grid_size):
+        x0, x1 = 0, W
+        for x in range(grid_size):
+            if next_idx < N:
+                img = Xs[next_idx]
+                low, high = np.min(img), np.max(img)
+                grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
+                # grid[y0:y1, x0:x1] = Xs[next_idx]
+                next_idx += 1
+            x0 += W + padding
+            x1 += W + padding
+        y0 += H + padding
+        y1 += H + padding
+    # grid_max = np.max(grid)
+    # grid_min = np.min(grid)
+    # grid = ubound * (grid - grid_min) / (grid_max - grid_min)
+    return grid
+
+def vis_grid(Xs):
+    """ visualize a grid of images """
+    (N, H, W, C) = Xs.shape
+    A = int(ceil(sqrt(N)))
+    G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
+    G *= np.min(Xs)
+    n = 0
+    for y in range(A):
+        for x in range(A):
+            if n < N:
+                G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:]
+                n += 1
+    # normalize to [0,1]
+    maxg = G.max()
+    ming = G.min()
+    G = (G - ming)/(maxg-ming)
+    return G
+
+def vis_nn(rows):
+    """ visualize array of arrays of images """
+    N = len(rows)
+    D = len(rows[0])
+    H,W,C = rows[0][0].shape
+    Xs = rows[0][0]
+    G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
+    for y in range(N):
+        for x in range(D):
+            G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
+    # normalize to [0,1]
+    maxg = G.max()
+    ming = G.min()
+    G = (G - ming)/(maxg-ming)
+    return G