DL_Course_SamU/lab_1-2/scripts/gradient_check.py

from __future__ import print_function
from builtins import range
from past.builtins import xrange

import numpy as np
from random import randrange

def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
    """
    a naive implementation of numerical gradient of f at x
    - f should be a function that takes a single argument
    - x is the point (numpy array) to evaluate the gradient at
    """

    fx = f(x) # evaluate function value at original point
    grad = np.zeros_like(x)
    # iterate over all indexes in x
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:

        # evaluate function at x+h
        ix = it.multi_index
        oldval = x[ix]
        x[ix] = oldval + h # increment by h
        fxph = f(x) # evalute f(x + h)
        x[ix] = oldval - h
        fxmh = f(x) # evaluate f(x - h)
        x[ix] = oldval # restore

        # compute the partial derivative with centered formula
        grad[ix] = (fxph - fxmh) / (2 * h) # the slope
        if verbose:
            print(ix, grad[ix])
        it.iternext() # step to next dimension

    return grad


def eval_numerical_gradient_array(f, x, df, h=1e-5):
    """
    Evaluate a numeric gradient for a function that accepts a numpy
    array and returns a numpy array.
    """
    grad = np.zeros_like(x)
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index

        oldval = x[ix]
        x[ix] = oldval + h
        pos = f(x).copy()
        x[ix] = oldval - h
        neg = f(x).copy()
        x[ix] = oldval

        grad[ix] = np.sum((pos - neg) * df) / (2 * h)
        it.iternext()
    return grad


def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
    """
    Compute numeric gradients for a function that operates on input
    and output blobs.

    We assume that f accepts several input blobs as arguments, followed by a
    blob where outputs will be written. For example, f might be called like:

    f(x, w, out)

    where x and w are input Blobs, and the result of f will be written to out.

    Inputs:
    - f: function
    - inputs: tuple of input blobs
    - output: output blob
    - h: step size
    """
    numeric_diffs = []
    for input_blob in inputs:
        diff = np.zeros_like(input_blob.diffs)
        it = np.nditer(input_blob.vals, flags=['multi_index'],
                       op_flags=['readwrite'])
        while not it.finished:
            idx = it.multi_index
            orig = input_blob.vals[idx]

            input_blob.vals[idx] = orig + h
            f(*(inputs + (output,)))
            pos = np.copy(output.vals)
            input_blob.vals[idx] = orig - h
            f(*(inputs + (output,)))
            neg = np.copy(output.vals)
            input_blob.vals[idx] = orig

            diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)

            it.iternext()
        numeric_diffs.append(diff)
    return numeric_diffs


def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
    return eval_numerical_gradient_blobs(lambda *args: net.forward(),
                inputs, output, h=h)


def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
    """
    sample a few random elements and only return numerical
    in this dimensions.
    """

    for i in range(num_checks):
        ix = tuple([randrange(m) for m in x.shape])

        oldval = x[ix]
        x[ix] = oldval + h # increment by h
        fxph = f(x) # evaluate f(x + h)
        x[ix] = oldval - h # increment by h
        fxmh = f(x) # evaluate f(x - h)
        x[ix] = oldval # reset

        grad_numerical = (fxph - fxmh) / (2 * h)
        grad_analytic = analytic_grad[ix]
        rel_error = (abs(grad_numerical - grad_analytic) /
                    (abs(grad_numerical) + abs(grad_analytic)))
        print('numerical: %f analytic: %f, relative error: %e'
              %(grad_numerical, grad_analytic, rel_error))
add assignment 2 4 years ago			`from __future__ import print_function`
			`from builtins import range`
			`from past.builtins import xrange`

			`import numpy as np`
			`from random import randrange`

			`def eval_numerical_gradient(f, x, verbose=True, h=0.00001):`
			`"""`
			`a naive implementation of numerical gradient of f at x`
			`- f should be a function that takes a single argument`
			`- x is the point (numpy array) to evaluate the gradient at`
			`"""`

			`fx = f(x) # evaluate function value at original point`
			`grad = np.zeros_like(x)`
			`# iterate over all indexes in x`
			`it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])`
			`while not it.finished:`

			`# evaluate function at x+h`
			`ix = it.multi_index`
			`oldval = x[ix]`
			`x[ix] = oldval + h # increment by h`
			`fxph = f(x) # evalute f(x + h)`
			`x[ix] = oldval - h`
			`fxmh = f(x) # evaluate f(x - h)`
			`x[ix] = oldval # restore`

			`# compute the partial derivative with centered formula`
			`grad[ix] = (fxph - fxmh) / (2 * h) # the slope`
			`if verbose:`
			`print(ix, grad[ix])`
			`it.iternext() # step to next dimension`

			`return grad`


			`def eval_numerical_gradient_array(f, x, df, h=1e-5):`
			`"""`
			`Evaluate a numeric gradient for a function that accepts a numpy`
			`array and returns a numpy array.`
			`"""`
			`grad = np.zeros_like(x)`
			`it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])`
			`while not it.finished:`
			`ix = it.multi_index`

			`oldval = x[ix]`
			`x[ix] = oldval + h`
			`pos = f(x).copy()`
			`x[ix] = oldval - h`
			`neg = f(x).copy()`
			`x[ix] = oldval`

			`grad[ix] = np.sum((pos - neg) * df) / (2 * h)`
			`it.iternext()`
			`return grad`


			`def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):`
			`"""`
			`Compute numeric gradients for a function that operates on input`
			`and output blobs.`

			`We assume that f accepts several input blobs as arguments, followed by a`
			`blob where outputs will be written. For example, f might be called like:`

			`f(x, w, out)`

			`where x and w are input Blobs, and the result of f will be written to out.`

			`Inputs:`
			`- f: function`
			`- inputs: tuple of input blobs`
			`- output: output blob`
			`- h: step size`
			`"""`
			`numeric_diffs = []`
			`for input_blob in inputs:`
			`diff = np.zeros_like(input_blob.diffs)`
			`it = np.nditer(input_blob.vals, flags=['multi_index'],`
			`op_flags=['readwrite'])`
			`while not it.finished:`
			`idx = it.multi_index`
			`orig = input_blob.vals[idx]`

			`input_blob.vals[idx] = orig + h`
			`f(*(inputs + (output,)))`
			`pos = np.copy(output.vals)`
			`input_blob.vals[idx] = orig - h`
			`f(*(inputs + (output,)))`
			`neg = np.copy(output.vals)`
			`input_blob.vals[idx] = orig`

			`diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)`

			`it.iternext()`
			`numeric_diffs.append(diff)`
			`return numeric_diffs`


			`def eval_numerical_gradient_net(net, inputs, output, h=1e-5):`
			`return eval_numerical_gradient_blobs(lambda *args: net.forward(),`
			`inputs, output, h=h)`


			`def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):`
			`"""`
			`sample a few random elements and only return numerical`
			`in this dimensions.`
			`"""`

			`for i in range(num_checks):`
			`ix = tuple([randrange(m) for m in x.shape])`

			`oldval = x[ix]`
			`x[ix] = oldval + h # increment by h`
			`fxph = f(x) # evaluate f(x + h)`
			`x[ix] = oldval - h # increment by h`
			`fxmh = f(x) # evaluate f(x - h)`
			`x[ix] = oldval # reset`

			`grad_numerical = (fxph - fxmh) / (2 * h)`
			`grad_analytic = analytic_grad[ix]`
			`rel_error = (abs(grad_numerical - grad_analytic) /`
			`(abs(grad_numerical) + abs(grad_analytic)))`
			`print('numerical: %f analytic: %f, relative error: %e'`
			`%(grad_numerical, grad_analytic, rel_error))`