mirror of https://github.com/da0c/DL_Course_SamU
Add files via upload
parent
3708504db4
commit
18e1150b07
File diff suppressed because one or more lines are too long
@ -0,0 +1,2 @@
|
||||
from scripts.classifiers.k_nearest_neighbor import *
|
||||
from scripts.classifiers.linear_classifier import *
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,183 @@
|
||||
from builtins import range
|
||||
from builtins import object
|
||||
import numpy as np
|
||||
from past.builtins import xrange
|
||||
|
||||
|
||||
class KNearestNeighbor(object):
|
||||
""" a kNN classifier with L2 distance """
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def train(self, X, y):
|
||||
"""
|
||||
Train the classifier. For k-nearest neighbors this is just
|
||||
memorizing the training data.
|
||||
|
||||
Inputs:
|
||||
- X: A numpy array of shape (num_train, D) containing the training data
|
||||
consisting of num_train samples each of dimension D.
|
||||
- y: A numpy array of shape (N,) containing the training labels, where
|
||||
y[i] is the label for X[i].
|
||||
"""
|
||||
self.X_train = X
|
||||
self.y_train = y
|
||||
|
||||
def predict(self, X, k=1, num_loops=0):
|
||||
"""
|
||||
Predict labels for test data using this classifier.
|
||||
|
||||
Inputs:
|
||||
- X: A numpy array of shape (num_test, D) containing test data consisting
|
||||
of num_test samples each of dimension D.
|
||||
- k: The number of nearest neighbors that vote for the predicted labels.
|
||||
- num_loops: Determines which implementation to use to compute distances
|
||||
between training points and testing points.
|
||||
|
||||
Returns:
|
||||
- y: A numpy array of shape (num_test,) containing predicted labels for the
|
||||
test data, where y[i] is the predicted label for the test point X[i].
|
||||
"""
|
||||
if num_loops == 0:
|
||||
dists = self.compute_distances_no_loops(X)
|
||||
elif num_loops == 1:
|
||||
dists = self.compute_distances_one_loop(X)
|
||||
elif num_loops == 2:
|
||||
dists = self.compute_distances_two_loops(X)
|
||||
else:
|
||||
raise ValueError('Invalid value %d for num_loops' % num_loops)
|
||||
|
||||
return self.predict_labels(dists, k=k)
|
||||
|
||||
def compute_distances_two_loops(self, X):
|
||||
"""
|
||||
Compute the distance between each test point in X and each training point
|
||||
in self.X_train using a nested loop over both the training data and the
|
||||
test data.
|
||||
|
||||
Inputs:
|
||||
- X: A numpy array of shape (num_test, D) containing test data.
|
||||
|
||||
Returns:
|
||||
- dists: A numpy array of shape (num_test, num_train) where dists[i, j]
|
||||
is the Euclidean distance between the ith test point and the jth training
|
||||
point.
|
||||
"""
|
||||
num_test = X.shape[0]
|
||||
num_train = self.X_train.shape[0]
|
||||
dists = np.zeros((num_test, num_train))
|
||||
for i in range(num_test):
|
||||
for j in range(num_train):
|
||||
#####################################################################
|
||||
# TODO: #
|
||||
# Compute the l2 distance between the ith test point and the jth #
|
||||
# training point, and store the result in dists[i, j]. You should #
|
||||
# not use a loop over dimension, nor use np.linalg.norm(). #
|
||||
#####################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
return dists
|
||||
|
||||
def compute_distances_one_loop(self, X):
|
||||
"""
|
||||
Compute the distance between each test point in X and each training point
|
||||
in self.X_train using a single loop over the test data.
|
||||
|
||||
Input / Output: Same as compute_distances_two_loops
|
||||
"""
|
||||
num_test = X.shape[0]
|
||||
num_train = self.X_train.shape[0]
|
||||
dists = np.zeros((num_test, num_train))
|
||||
for i in range(num_test):
|
||||
#######################################################################
|
||||
# TODO: #
|
||||
# Compute the l2 distance between the ith test point and all training #
|
||||
# points, and store the result in dists[i, :]. #
|
||||
# Do not use np.linalg.norm(). #
|
||||
#######################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
return dists
|
||||
|
||||
def compute_distances_no_loops(self, X):
|
||||
"""
|
||||
Compute the distance between each test point in X and each training point
|
||||
in self.X_train using no explicit loops.
|
||||
|
||||
Input / Output: Same as compute_distances_two_loops
|
||||
"""
|
||||
num_test = X.shape[0]
|
||||
num_train = self.X_train.shape[0]
|
||||
dists = np.zeros((num_test, num_train))
|
||||
#########################################################################
|
||||
# TODO: #
|
||||
# Compute the l2 distance between all test points and all training #
|
||||
# points without using any explicit loops, and store the result in #
|
||||
# dists. #
|
||||
# #
|
||||
# You should implement this function using only basic array operations; #
|
||||
# in particular you should not use functions from scipy, #
|
||||
# nor use np.linalg.norm(). #
|
||||
# #
|
||||
# HINT: Try to formulate the l2 distance using matrix multiplication #
|
||||
# and two broadcast sums. #
|
||||
#########################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
return dists
|
||||
|
||||
def predict_labels(self, dists, k=1):
|
||||
"""
|
||||
Given a matrix of distances between test points and training points,
|
||||
predict a label for each test point.
|
||||
|
||||
Inputs:
|
||||
- dists: A numpy array of shape (num_test, num_train) where dists[i, j]
|
||||
gives the distance betwen the ith test point and the jth training point.
|
||||
|
||||
Returns:
|
||||
- y: A numpy array of shape (num_test,) containing predicted labels for the
|
||||
test data, where y[i] is the predicted label for the test point X[i].
|
||||
"""
|
||||
num_test = dists.shape[0]
|
||||
y_pred = np.zeros(num_test)
|
||||
for i in range(num_test):
|
||||
# A list of length k storing the labels of the k nearest neighbors to
|
||||
# the ith test point.
|
||||
closest_y = []
|
||||
#########################################################################
|
||||
# TODO: #
|
||||
# Use the distance matrix to find the k nearest neighbors of the ith #
|
||||
# testing point, and use self.y_train to find the labels of these #
|
||||
# neighbors. Store these labels in closest_y. #
|
||||
# Hint: Look up the function numpy.argsort. #
|
||||
#########################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
#########################################################################
|
||||
# TODO: #
|
||||
# Now that you have found the labels of the k nearest neighbors, you #
|
||||
# need to find the most common label in the list closest_y of labels. #
|
||||
# Store this label in y_pred[i]. Break ties by choosing the smaller #
|
||||
# label. #
|
||||
#########################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
return y_pred
|
@ -0,0 +1,139 @@
|
||||
from __future__ import print_function
|
||||
|
||||
from builtins import range
|
||||
from builtins import object
|
||||
import numpy as np
|
||||
from scripts.classifiers.linear_svm import *
|
||||
from scripts.classifiers.softmax import *
|
||||
from past.builtins import xrange
|
||||
|
||||
|
||||
class LinearClassifier(object):
|
||||
|
||||
def __init__(self):
|
||||
self.W = None
|
||||
|
||||
def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100,
|
||||
batch_size=200, verbose=False):
|
||||
"""
|
||||
Train this linear classifier using stochastic gradient descent.
|
||||
|
||||
Inputs:
|
||||
- X: A numpy array of shape (N, D) containing training data; there are N
|
||||
training samples each of dimension D.
|
||||
- y: A numpy array of shape (N,) containing training labels; y[i] = c
|
||||
means that X[i] has label 0 <= c < C for C classes.
|
||||
- learning_rate: (float) learning rate for optimization.
|
||||
- reg: (float) regularization strength.
|
||||
- num_iters: (integer) number of steps to take when optimizing
|
||||
- batch_size: (integer) number of training examples to use at each step.
|
||||
- verbose: (boolean) If true, print progress during optimization.
|
||||
|
||||
Outputs:
|
||||
A list containing the value of the loss function at each training iteration.
|
||||
"""
|
||||
num_train, dim = X.shape
|
||||
num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
|
||||
if self.W is None:
|
||||
# lazily initialize W
|
||||
self.W = 0.001 * np.random.randn(dim, num_classes)
|
||||
|
||||
# Run stochastic gradient descent to optimize W
|
||||
loss_history = []
|
||||
for it in range(num_iters):
|
||||
X_batch = None
|
||||
y_batch = None
|
||||
|
||||
#########################################################################
|
||||
# TODO: #
|
||||
# Sample batch_size elements from the training data and their #
|
||||
# corresponding labels to use in this round of gradient descent. #
|
||||
# Store the data in X_batch and their corresponding labels in #
|
||||
# y_batch; after sampling X_batch should have shape (batch_size, dim) #
|
||||
# and y_batch should have shape (batch_size,) #
|
||||
# #
|
||||
# Hint: Use np.random.choice to generate indices. Sampling with #
|
||||
# replacement is faster than sampling without replacement. #
|
||||
#########################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
# evaluate loss and gradient
|
||||
loss, grad = self.loss(X_batch, y_batch, reg)
|
||||
loss_history.append(loss)
|
||||
|
||||
# perform parameter update
|
||||
#########################################################################
|
||||
# TODO: #
|
||||
# Update the weights using the gradient and the learning rate. #
|
||||
#########################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
if verbose and it % 100 == 0:
|
||||
print('iteration %d / %d: loss %f' % (it, num_iters, loss))
|
||||
|
||||
return loss_history
|
||||
|
||||
def predict(self, X):
|
||||
"""
|
||||
Use the trained weights of this linear classifier to predict labels for
|
||||
data points.
|
||||
|
||||
Inputs:
|
||||
- X: A numpy array of shape (N, D) containing training data; there are N
|
||||
training samples each of dimension D.
|
||||
|
||||
Returns:
|
||||
- y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional
|
||||
array of length N, and each element is an integer giving the predicted
|
||||
class.
|
||||
"""
|
||||
y_pred = np.zeros(X.shape[0])
|
||||
###########################################################################
|
||||
# TODO: #
|
||||
# Implement this method. Store the predicted labels in y_pred. #
|
||||
###########################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
return y_pred
|
||||
|
||||
def loss(self, X_batch, y_batch, reg):
|
||||
"""
|
||||
Compute the loss function and its derivative.
|
||||
Subclasses will override this.
|
||||
|
||||
Inputs:
|
||||
- X_batch: A numpy array of shape (N, D) containing a minibatch of N
|
||||
data points; each point has dimension D.
|
||||
- y_batch: A numpy array of shape (N,) containing labels for the minibatch.
|
||||
- reg: (float) regularization strength.
|
||||
|
||||
Returns: A tuple containing:
|
||||
- loss as a single float
|
||||
- gradient with respect to self.W; an array of the same shape as W
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class LinearSVM(LinearClassifier):
|
||||
""" A subclass that uses the Multiclass SVM loss function """
|
||||
|
||||
def loss(self, X_batch, y_batch, reg):
|
||||
return svm_loss_vectorized(self.W, X_batch, y_batch, reg)
|
||||
|
||||
|
||||
class Softmax(LinearClassifier):
|
||||
""" A subclass that uses the Softmax + Cross-entropy loss function """
|
||||
|
||||
def loss(self, X_batch, y_batch, reg):
|
||||
return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
|
@ -0,0 +1,100 @@
|
||||
from builtins import range
|
||||
import numpy as np
|
||||
from random import shuffle
|
||||
from past.builtins import xrange
|
||||
|
||||
def svm_loss_naive(W, X, y, reg):
|
||||
"""
|
||||
Structured SVM loss function, naive implementation (with loops).
|
||||
|
||||
Inputs have dimension D, there are C classes, and we operate on minibatches
|
||||
of N examples.
|
||||
|
||||
Inputs:
|
||||
- W: A numpy array of shape (D, C) containing weights.
|
||||
- X: A numpy array of shape (N, D) containing a minibatch of data.
|
||||
- y: A numpy array of shape (N,) containing training labels; y[i] = c means
|
||||
that X[i] has label c, where 0 <= c < C.
|
||||
- reg: (float) regularization strength
|
||||
|
||||
Returns a tuple of:
|
||||
- loss as single float
|
||||
- gradient with respect to weights W; an array of same shape as W
|
||||
"""
|
||||
dW = np.zeros(W.shape) # initialize the gradient as zero
|
||||
|
||||
# compute the loss and the gradient
|
||||
num_classes = W.shape[1]
|
||||
num_train = X.shape[0]
|
||||
loss = 0.0
|
||||
for i in range(num_train):
|
||||
scores = X[i].dot(W)
|
||||
correct_class_score = scores[y[i]]
|
||||
for j in range(num_classes):
|
||||
if j == y[i]:
|
||||
continue
|
||||
margin = scores[j] - correct_class_score + 1 # note delta = 1
|
||||
if margin > 0:
|
||||
loss += margin
|
||||
|
||||
# Right now the loss is a sum over all training examples, but we want it
|
||||
# to be an average instead so we divide by num_train.
|
||||
loss /= num_train
|
||||
|
||||
# Add regularization to the loss.
|
||||
loss += reg * np.sum(W * W)
|
||||
|
||||
#############################################################################
|
||||
# TODO: #
|
||||
# Compute the gradient of the loss function and store it dW. #
|
||||
# Rather than first computing the loss and then computing the derivative, #
|
||||
# it may be simpler to compute the derivative at the same time that the #
|
||||
# loss is being computed. As a result you may need to modify some of the #
|
||||
# code above to compute the gradient. #
|
||||
#############################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
return loss, dW
|
||||
|
||||
|
||||
|
||||
def svm_loss_vectorized(W, X, y, reg):
|
||||
"""
|
||||
Structured SVM loss function, vectorized implementation.
|
||||
|
||||
Inputs and outputs are the same as svm_loss_naive.
|
||||
"""
|
||||
loss = 0.0
|
||||
dW = np.zeros(W.shape) # initialize the gradient as zero
|
||||
|
||||
#############################################################################
|
||||
# TODO: #
|
||||
# Implement a vectorized version of the structured SVM loss, storing the #
|
||||
# result in loss. #
|
||||
#############################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
#############################################################################
|
||||
# TODO: #
|
||||
# Implement a vectorized version of the gradient for the structured SVM #
|
||||
# loss, storing the result in dW. #
|
||||
# #
|
||||
# Hint: Instead of computing the gradient from scratch, it may be easier #
|
||||
# to reuse some of the intermediate values that you used to compute the #
|
||||
# loss. #
|
||||
#############################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
return loss, dW
|
@ -0,0 +1,225 @@
|
||||
from __future__ import print_function
|
||||
|
||||
from builtins import range
|
||||
from builtins import object
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from past.builtins import xrange
|
||||
|
||||
class TwoLayerNet(object):
|
||||
"""
|
||||
A two-layer fully-connected neural network. The net has an input dimension of
|
||||
N, a hidden layer dimension of H, and performs classification over C classes.
|
||||
We train the network with a softmax loss function and L2 regularization on the
|
||||
weight matrices. The network uses a ReLU nonlinearity after the first fully
|
||||
connected layer.
|
||||
|
||||
In other words, the network has the following architecture:
|
||||
|
||||
input - fully connected layer - ReLU - fully connected layer - softmax
|
||||
|
||||
The outputs of the second fully-connected layer are the scores for each class.
|
||||
"""
|
||||
|
||||
def __init__(self, input_size, hidden_size, output_size, std=1e-4):
|
||||
"""
|
||||
Initialize the model. Weights are initialized to small random values and
|
||||
biases are initialized to zero. Weights and biases are stored in the
|
||||
variable self.params, which is a dictionary with the following keys:
|
||||
|
||||
W1: First layer weights; has shape (D, H)
|
||||
b1: First layer biases; has shape (H,)
|
||||
W2: Second layer weights; has shape (H, C)
|
||||
b2: Second layer biases; has shape (C,)
|
||||
|
||||
Inputs:
|
||||
- input_size: The dimension D of the input data.
|
||||
- hidden_size: The number of neurons H in the hidden layer.
|
||||
- output_size: The number of classes C.
|
||||
"""
|
||||
self.params = {}
|
||||
self.params['W1'] = std * np.random.randn(input_size, hidden_size)
|
||||
self.params['b1'] = np.zeros(hidden_size)
|
||||
self.params['W2'] = std * np.random.randn(hidden_size, output_size)
|
||||
self.params['b2'] = np.zeros(output_size)
|
||||
|
||||
def loss(self, X, y=None, reg=0.0):
|
||||
"""
|
||||
Compute the loss and gradients for a two layer fully connected neural
|
||||
network.
|
||||
|
||||
Inputs:
|
||||
- X: Input data of shape (N, D). Each X[i] is a training sample.
|
||||
- y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
|
||||
an integer in the range 0 <= y[i] < C. This parameter is optional; if it
|
||||
is not passed then we only return scores, and if it is passed then we
|
||||
instead return the loss and gradients.
|
||||
- reg: Regularization strength.
|
||||
|
||||
Returns:
|
||||
If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
|
||||
the score for class c on input X[i].
|
||||
|
||||
If y is not None, instead return a tuple of:
|
||||
- loss: Loss (data loss and regularization loss) for this batch of training
|
||||
samples.
|
||||
- grads: Dictionary mapping parameter names to gradients of those parameters
|
||||
with respect to the loss function; has the same keys as self.params.
|
||||
"""
|
||||
# Unpack variables from the params dictionary
|
||||
W1, b1 = self.params['W1'], self.params['b1']
|
||||
W2, b2 = self.params['W2'], self.params['b2']
|
||||
N, D = X.shape
|
||||
|
||||
# Compute the forward pass
|
||||
scores = None
|
||||
#############################################################################
|
||||
# TODO: Perform the forward pass, computing the class scores for the input. #
|
||||
# Store the result in the scores variable, which should be an array of #
|
||||
# shape (N, C). #
|
||||
#############################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
# If the targets are not given then jump out, we're done
|
||||
if y is None:
|
||||
return scores
|
||||
|
||||
# Compute the loss
|
||||
loss = None
|
||||
#############################################################################
|
||||
# TODO: Finish the forward pass, and compute the loss. This should include #
|
||||
# both the data loss and L2 regularization for W1 and W2. Store the result #
|
||||
# in the variable loss, which should be a scalar. Use the Softmax #
|
||||
# classifier loss. #
|
||||
#############################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
# Backward pass: compute gradients
|
||||
grads = {}
|
||||
#############################################################################
|
||||
# TODO: Compute the backward pass, computing the derivatives of the weights #
|
||||
# and biases. Store the results in the grads dictionary. For example, #
|
||||
# grads['W1'] should store the gradient on W1, and be a matrix of same size #
|
||||
#############################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
return loss, grads
|
||||
|
||||
def train(self, X, y, X_val, y_val,
|
||||
learning_rate=1e-3, learning_rate_decay=0.95,
|
||||
reg=5e-6, num_iters=100,
|
||||
batch_size=200, verbose=False):
|
||||
"""
|
||||
Train this neural network using stochastic gradient descent.
|
||||
|
||||
Inputs:
|
||||
- X: A numpy array of shape (N, D) giving training data.
|
||||
- y: A numpy array f shape (N,) giving training labels; y[i] = c means that
|
||||
X[i] has label c, where 0 <= c < C.
|
||||
- X_val: A numpy array of shape (N_val, D) giving validation data.
|
||||
- y_val: A numpy array of shape (N_val,) giving validation labels.
|
||||
- learning_rate: Scalar giving learning rate for optimization.
|
||||
- learning_rate_decay: Scalar giving factor used to decay the learning rate
|
||||
after each epoch.
|
||||
- reg: Scalar giving regularization strength.
|
||||
- num_iters: Number of steps to take when optimizing.
|
||||
- batch_size: Number of training examples to use per step.
|
||||
- verbose: boolean; if true print progress during optimization.
|
||||
"""
|
||||
num_train = X.shape[0]
|
||||
iterations_per_epoch = max(num_train / batch_size, 1)
|
||||
|
||||
# Use SGD to optimize the parameters in self.model
|
||||
loss_history = []
|
||||
train_acc_history = []
|
||||
val_acc_history = []
|
||||
|
||||
for it in range(num_iters):
|
||||
X_batch = None
|
||||
y_batch = None
|
||||
|
||||
#########################################################################
|
||||
# TODO: Create a random minibatch of training data and labels, storing #
|
||||
# them in X_batch and y_batch respectively. #
|
||||
#########################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
# Compute loss and gradients using the current minibatch
|
||||
loss, grads = self.loss(X_batch, y=y_batch, reg=reg)
|
||||
loss_history.append(loss)
|
||||
|
||||
#########################################################################
|
||||
# TODO: Use the gradients in the grads dictionary to update the #
|
||||
# parameters of the network (stored in the dictionary self.params) #
|
||||
# using stochastic gradient descent. You'll need to use the gradients #
|
||||
# stored in the grads dictionary defined above. #
|
||||
#########################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
if verbose and it % 100 == 0:
|
||||
print('iteration %d / %d: loss %f' % (it, num_iters, loss))
|
||||
|
||||
# Every epoch, check train and val accuracy and decay learning rate.
|
||||
if it % iterations_per_epoch == 0:
|
||||
# Check accuracy
|
||||
train_acc = (self.predict(X_batch) == y_batch).mean()
|
||||
val_acc = (self.predict(X_val) == y_val).mean()
|
||||
train_acc_history.append(train_acc)
|
||||
val_acc_history.append(val_acc)
|
||||
|
||||
# Decay learning rate
|
||||
learning_rate *= learning_rate_decay
|
||||
|
||||
return {
|
||||
'loss_history': loss_history,
|
||||
'train_acc_history': train_acc_history,
|
||||
'val_acc_history': val_acc_history,
|
||||
}
|
||||
|
||||
def predict(self, X):
|
||||
"""
|
||||
Use the trained weights of this two-layer network to predict labels for
|
||||
data points. For each data point we predict scores for each of the C
|
||||
classes, and assign each data point to the class with the highest score.
|
||||
|
||||
Inputs:
|
||||
- X: A numpy array of shape (N, D) giving N D-dimensional data points to
|
||||
classify.
|
||||
|
||||
Returns:
|
||||
- y_pred: A numpy array of shape (N,) giving predicted labels for each of
|
||||
the elements of X. For all i, y_pred[i] = c means that X[i] is predicted
|
||||
to have class c, where 0 <= c < C.
|
||||
"""
|
||||
y_pred = None
|
||||
|
||||
###########################################################################
|
||||
# TODO: Implement this function; it should be VERY simple! #
|
||||
###########################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
return y_pred
|
@ -0,0 +1,65 @@
|
||||
from builtins import range
|
||||
import numpy as np
|
||||
from random import shuffle
|
||||
from past.builtins import xrange
|
||||
|
||||
def softmax_loss_naive(W, X, y, reg):
|
||||
"""
|
||||
Softmax loss function, naive implementation (with loops)
|
||||
|
||||
Inputs have dimension D, there are C classes, and we operate on minibatches
|
||||
of N examples.
|
||||
|
||||
Inputs:
|
||||
- W: A numpy array of shape (D, C) containing weights.
|
||||
- X: A numpy array of shape (N, D) containing a minibatch of data.
|
||||
- y: A numpy array of shape (N,) containing training labels; y[i] = c means
|
||||
that X[i] has label c, where 0 <= c < C.
|
||||
- reg: (float) regularization strength
|
||||
|
||||
Returns a tuple of:
|
||||
- loss as single float
|
||||
- gradient with respect to weights W; an array of same shape as W
|
||||
"""
|
||||
# Initialize the loss and gradient to zero.
|
||||
loss = 0.0
|
||||
dW = np.zeros_like(W)
|
||||
|
||||
#############################################################################
|
||||
# TODO: Compute the softmax loss and its gradient using explicit loops. #
|
||||
# Store the loss in loss and the gradient in dW. If you are not careful #
|
||||
# here, it is easy to run into numeric instability. Don't forget the #
|
||||
# regularization! #
|
||||
#############################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
return loss, dW
|
||||
|
||||
|
||||
def softmax_loss_vectorized(W, X, y, reg):
|
||||
"""
|
||||
Softmax loss function, vectorized version.
|
||||
|
||||
Inputs and outputs are the same as softmax_loss_naive.
|
||||
"""
|
||||
# Initialize the loss and gradient to zero.
|
||||
loss = 0.0
|
||||
dW = np.zeros_like(W)
|
||||
|
||||
#############################################################################
|
||||
# TODO: Compute the softmax loss and its gradient using no explicit loops. #
|
||||
# Store the loss in loss and the gradient in dW. If you are not careful #
|
||||
# here, it is easy to run into numeric instability. Don't forget the #
|
||||
# regularization! #
|
||||
#############################################################################
|
||||
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
pass
|
||||
|
||||
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
|
||||
|
||||
return loss, dW
|
Binary file not shown.
Loading…
Reference in New Issue