from __future__ import print_function from builtins import range from past.builtins import xrange import numpy as np from random import randrange def eval_numerical_gradient(f, x, verbose=True, h=0.00001): """ a naive implementation of numerical gradient of f at x - f should be a function that takes a single argument - x is the point (numpy array) to evaluate the gradient at """ fx = f(x) # evaluate function value at original point grad = np.zeros_like(x) # iterate over all indexes in x it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) while not it.finished: # evaluate function at x+h ix = it.multi_index oldval = x[ix] x[ix] = oldval + h # increment by h fxph = f(x) # evalute f(x + h) x[ix] = oldval - h fxmh = f(x) # evaluate f(x - h) x[ix] = oldval # restore # compute the partial derivative with centered formula grad[ix] = (fxph - fxmh) / (2 * h) # the slope if verbose: print(ix, grad[ix]) it.iternext() # step to next dimension return grad def eval_numerical_gradient_array(f, x, df, h=1e-5): """ Evaluate a numeric gradient for a function that accepts a numpy array and returns a numpy array. """ grad = np.zeros_like(x) it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"]) while not it.finished: ix = it.multi_index oldval = x[ix] x[ix] = oldval + h pos = f(x).copy() x[ix] = oldval - h neg = f(x).copy() x[ix] = oldval grad[ix] = np.sum((pos - neg) * df) / (2 * h) it.iternext() return grad def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): """ Compute numeric gradients for a function that operates on input and output blobs. We assume that f accepts several input blobs as arguments, followed by a blob where outputs will be written. For example, f might be called like: f(x, w, out) where x and w are input Blobs, and the result of f will be written to out. Inputs: - f: function - inputs: tuple of input blobs - output: output blob - h: step size """ numeric_diffs = [] for input_blob in inputs: diff = np.zeros_like(input_blob.diffs) it = np.nditer(input_blob.vals, flags=["multi_index"], op_flags=["readwrite"]) while not it.finished: idx = it.multi_index orig = input_blob.vals[idx] input_blob.vals[idx] = orig + h f(*(inputs + (output,))) pos = np.copy(output.vals) input_blob.vals[idx] = orig - h f(*(inputs + (output,))) neg = np.copy(output.vals) input_blob.vals[idx] = orig diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) it.iternext() numeric_diffs.append(diff) return numeric_diffs def eval_numerical_gradient_net(net, inputs, output, h=1e-5): return eval_numerical_gradient_blobs( lambda *args: net.forward(), inputs, output, h=h ) def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): """ sample a few random elements and only return numerical in this dimensions. """ for i in range(num_checks): ix = tuple([randrange(m) for m in x.shape]) oldval = x[ix] x[ix] = oldval + h # increment by h fxph = f(x) # evaluate f(x + h) x[ix] = oldval - h # increment by h fxmh = f(x) # evaluate f(x - h) x[ix] = oldval # reset grad_numerical = (fxph - fxmh) / (2 * h) grad_analytic = analytic_grad[ix] rel_error = abs(grad_numerical - grad_analytic) / ( abs(grad_numerical) + abs(grad_analytic) ) print( "numerical: %f analytic: %f, relative error: %e" % (grad_numerical, grad_analytic, rel_error) )