30 KiB
Лабораторная работа 4¶
Tensorflow 2.x
Подготовка данных
Использование Keras Model API
Использование Keras Sequential + Functional API
Для выполнения лабораторной работы необходимо установить tensorflow версии 2.0 или выше .
Рекомендуется использовать возможности Colab'а по обучению моделей на GPU.
import os import tensorflow as tf import numpy as np import math import timeit import matplotlib.pyplot as plt %matplotlib inline
Подготовка данных¶
Загрузите набор данных из предыдущей лабораторной работы.
def load_cifar10(num_training=49000, num_validation=1000, num_test=10000): """ Fetch the CIFAR-10 dataset from the web and perform preprocessing to prepare it for the two-layer neural net classifier. These are the same steps as we used for the SVM, but condensed to a single function. """ # Load the raw CIFAR-10 dataset and use appropriate data types and shapes cifar10 = tf.keras.datasets.cifar10.load_data() (X_train, y_train), (X_test, y_test) = cifar10 X_train = np.asarray(X_train, dtype=np.float32) y_train = np.asarray(y_train, dtype=np.int32).flatten() X_test = np.asarray(X_test, dtype=np.float32) y_test = np.asarray(y_test, dtype=np.int32).flatten() # Subsample the data mask = range(num_training, num_training + num_validation) X_val = X_train[mask] y_val = y_train[mask] mask = range(num_training) X_train = X_train[mask] y_train = y_train[mask] mask = range(num_test) X_test = X_test[mask] y_test = y_test[mask] # Normalize the data: subtract the mean pixel and divide by std mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True) std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True) X_train = (X_train - mean_pixel) / std_pixel X_val = (X_val - mean_pixel) / std_pixel X_test = (X_test - mean_pixel) / std_pixel return X_train, y_train, X_val, y_val, X_test, y_test # If there are errors with SSL downloading involving self-signed certificates, # it may be that your Python version was recently installed on the current machine. # See: https://github.com/tensorflow/tensorflow/issues/10779 # To fix, run the command: /Applications/Python\ 3.7/Install\ Certificates.command # ...replacing paths as necessary. # Invoke the above function to get our data. NHW = (0, 1, 2) X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10() print('Train data shape: ', X_train.shape) print('Train labels shape: ', y_train.shape, y_train.dtype) print('Validation data shape: ', X_val.shape) print('Validation labels shape: ', y_val.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape)
class Dataset(object): def __init__(self, X, y, batch_size, shuffle=False): """ Construct a Dataset object to iterate over data X and labels y Inputs: - X: Numpy array of data, of any shape - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0] - batch_size: Integer giving number of elements per minibatch - shuffle: (optional) Boolean, whether to shuffle the data on each epoch """ assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels' self.X, self.y = X, y self.batch_size, self.shuffle = batch_size, shuffle def __iter__(self): N, B = self.X.shape[0], self.batch_size idxs = np.arange(N) if self.shuffle: np.random.shuffle(idxs) return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B)) train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True) val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False) test_dset = Dataset(X_test, y_test, batch_size=64)
# We can iterate through a dataset like this: for t, (x, y) in enumerate(train_dset): print(t, x.shape, y.shape) if t > 5: break
Keras Model Subclassing API¶
Для реализации собственной модели с помощью Keras Model Subclassing API необходимо выполнить следующие шаги:
Определить новый класс, который является наследником tf.keras.Model.
В методе init() определить все необходимые слои из модуля tf.keras.layer
Реализовать прямой проход в методе call() на основе слоев, объявленных в init()
Ниже приведен пример использования keras API для определения двухслойной полносвязной сети.
https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras
class TwoLayerFC(tf.keras.Model): def __init__(self, hidden_size, num_classes): super(TwoLayerFC, self).__init__() initializer = tf.initializers.VarianceScaling(scale=2.0) self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu', kernel_initializer=initializer) self.fc2 = tf.keras.layers.Dense(num_classes, activation='softmax', kernel_initializer=initializer) self.flatten = tf.keras.layers.Flatten() def call(self, x, training=False): x = self.flatten(x) x = self.fc1(x) x = self.fc2(x) return x def test_TwoLayerFC(): """ A small unit test to exercise the TwoLayerFC model above. """ input_size, hidden_size, num_classes = 50, 42, 10 x = tf.zeros((64, input_size)) model = TwoLayerFC(hidden_size, num_classes) with tf.device(device): scores = model(x) print(scores.shape) test_TwoLayerFC()
Реализуйте трехслойную CNN для вашей задачи классификации.
Архитектура сети:
- Сверточный слой (5 x 5 kernels, zero-padding = 'same')
- Функция активации ReLU
- Сверточный слой (3 x 3 kernels, zero-padding = 'same')
- Функция активации ReLU
- Полносвязный слой
- Функция активации Softmax
https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Conv2D
https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dense
class ThreeLayerConvNet(tf.keras.Model): def __init__(self, channel_1, channel_2, num_classes): super(ThreeLayerConvNet, self).__init__() ######################################################################## # TODO: Implement the __init__ method for a three-layer ConvNet. You # # should instantiate layer objects to be used in the forward pass. # ######################################################################## # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ######################################################################## # END OF YOUR CODE # ######################################################################## def call(self, x, training=False): scores = None ######################################################################## # TODO: Implement the forward pass for a three-layer ConvNet. You # # should use the layer objects defined in the __init__ method. # ######################################################################## # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ######################################################################## # END OF YOUR CODE # ######################################################################## return scores
def test_ThreeLayerConvNet(): channel_1, channel_2, num_classes = 12, 8, 10 model = ThreeLayerConvNet(channel_1, channel_2, num_classes) with tf.device(device): x = tf.zeros((64, 3, 32, 32)) scores = model(x) print(scores.shape) test_ThreeLayerConvNet()
Пример реализации процесса обучения:
def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1, is_training=False): """ Simple training loop for use with models defined using tf.keras. It trains a model for one epoch on the CIFAR-10 training set and periodically checks accuracy on the CIFAR-10 validation set. Inputs: - model_init_fn: A function that takes no parameters; when called it constructs the model we want to train: model = model_init_fn() - optimizer_init_fn: A function which takes no parameters; when called it constructs the Optimizer object we will use to optimize the model: optimizer = optimizer_init_fn() - num_epochs: The number of epochs to train for Returns: Nothing, but prints progress during trainingn """ with tf.device(device): loss_fn = tf.keras.losses.SparseCategoricalCrossentropy() model = model_init_fn() optimizer = optimizer_init_fn() train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') val_loss = tf.keras.metrics.Mean(name='val_loss') val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy') t = 0 for epoch in range(num_epochs): # Reset the metrics - https://www.tensorflow.org/alpha/guide/migration_guide#new-style_metrics train_loss.reset_states() train_accuracy.reset_states() for x_np, y_np in train_dset: with tf.GradientTape() as tape: # Use the model function to build the forward pass. scores = model(x_np, training=is_training) loss = loss_fn(y_np, scores) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) # Update the metrics train_loss.update_state(loss) train_accuracy.update_state(y_np, scores) if t % print_every == 0: val_loss.reset_states() val_accuracy.reset_states() for test_x, test_y in val_dset: # During validation at end of epoch, training set to False prediction = model(test_x, training=False) t_loss = loss_fn(test_y, prediction) val_loss.update_state(t_loss) val_accuracy.update_state(test_y, prediction) template = 'Iteration {}, Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}' print (template.format(t, epoch+1, train_loss.result(), train_accuracy.result()*100, val_loss.result(), val_accuracy.result()*100)) t += 1
hidden_size, num_classes = 4000, 10 learning_rate = 1e-2 def model_init_fn(): return TwoLayerFC(hidden_size, num_classes) def optimizer_init_fn(): return tf.keras.optimizers.SGD(learning_rate=learning_rate) train_part34(model_init_fn, optimizer_init_fn)
Обучите трехслойную CNN. В tf.keras.optimizers.SGD укажите Nesterov momentum = 0.9 .
https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/optimizers/SGD
Значение accuracy на валидационной выборке после 1 эпохи обучения должно быть > 50% .
learning_rate = 3e-3 channel_1, channel_2, num_classes = 32, 16, 10 def model_init_fn(): model = None ############################################################################ # TODO: Complete the implementation of model_fn. # ############################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ return model def optimizer_init_fn(): optimizer = None ############################################################################ # TODO: Complete the implementation of model_fn. # ############################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ return optimizer train_part34(model_init_fn, optimizer_init_fn)
Использование Keras Sequential API для реализации последовательных моделей.¶
Пример для полносвязной сети:
learning_rate = 1e-2 def model_init_fn(): input_shape = (32, 32, 3) hidden_layer_size, num_classes = 4000, 10 initializer = tf.initializers.VarianceScaling(scale=2.0) layers = [ tf.keras.layers.Flatten(input_shape=input_shape), tf.keras.layers.Dense(hidden_layer_size, activation='relu', kernel_initializer=initializer), tf.keras.layers.Dense(num_classes, activation='softmax', kernel_initializer=initializer), ] model = tf.keras.Sequential(layers) return model def optimizer_init_fn(): return tf.keras.optimizers.SGD(learning_rate=learning_rate) train_part34(model_init_fn, optimizer_init_fn)
Альтернативный менее гибкий способ обучения:
model = model_init_fn() model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=[tf.keras.metrics.sparse_categorical_accuracy]) model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val)) model.evaluate(X_test, y_test)
Перепишите реализацию трехслойной CNN с помощью tf.keras.Sequential API . Обучите модель двумя способами.
def model_init_fn(): model = None ############################################################################ # TODO: Construct a three-layer ConvNet using tf.keras.Sequential. # ############################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ return model learning_rate = 5e-4 def optimizer_init_fn(): optimizer = None ############################################################################ # TODO: Complete the implementation of model_fn. # ############################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ return optimizer train_part34(model_init_fn, optimizer_init_fn)
model = model_init_fn() model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy', metrics=[tf.keras.metrics.sparse_categorical_accuracy]) model.fit(X_train, y_train, batch_size=64, epochs=1, validation_data=(X_val, y_val)) model.evaluate(X_test, y_test)
Использование Keras Functional API¶
Для реализации более сложных архитектур сети с несколькими входами/выходами, повторным использованием слоев, "остаточными" связями (residual connections) необходимо явно указать входные и выходные тензоры.
Ниже представлен пример для полносвязной сети.
def two_layer_fc_functional(input_shape, hidden_size, num_classes): initializer = tf.initializers.VarianceScaling(scale=2.0) inputs = tf.keras.Input(shape=input_shape) flattened_inputs = tf.keras.layers.Flatten()(inputs) fc1_output = tf.keras.layers.Dense(hidden_size, activation='relu', kernel_initializer=initializer)(flattened_inputs) scores = tf.keras.layers.Dense(num_classes, activation='softmax', kernel_initializer=initializer)(fc1_output) # Instantiate the model given inputs and outputs. model = tf.keras.Model(inputs=inputs, outputs=scores) return model def test_two_layer_fc_functional(): """ A small unit test to exercise the TwoLayerFC model above. """ input_size, hidden_size, num_classes = 50, 42, 10 input_shape = (50,) x = tf.zeros((64, input_size)) model = two_layer_fc_functional(input_shape, hidden_size, num_classes) with tf.device(device): scores = model(x) print(scores.shape) test_two_layer_fc_functional()
input_shape = (32, 32, 3) hidden_size, num_classes = 4000, 10 learning_rate = 1e-2 def model_init_fn(): return two_layer_fc_functional(input_shape, hidden_size, num_classes) def optimizer_init_fn(): return tf.keras.optimizers.SGD(learning_rate=learning_rate) train_part34(model_init_fn, optimizer_init_fn)
Поэкспериментируйте с архитектурой сверточной сети. Для вашего набора данных вам необходимо получить как минимум 70% accuracy на валидационной выборке за 10 эпох обучения. Опишите все эксперименты и сделайте выводы (без выполнения данного пункта работы приниматься не будут).
Эспериментируйте с архитектурой, гиперпараметрами, функцией потерь, регуляризацией, методом оптимизации.
https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/BatchNormalization#methods https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/layers/Dropout#methods
class CustomConvNet(tf.keras.Model): def __init__(self): super(CustomConvNet, self).__init__() ############################################################################ # TODO: Construct a model that performs well on CIFAR-10 # ############################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ def call(self, input_tensor, training=False): ############################################################################ # TODO: Construct a model that performs well on CIFAR-10 # ############################################################################ # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** pass # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** ############################################################################ # END OF YOUR CODE # ############################################################################ return x print_every = 700 num_epochs = 10 model = CustomConvNet() def model_init_fn(): return CustomConvNet() def optimizer_init_fn(): learning_rate = 1e-3 return tf.keras.optimizers.Adam(learning_rate) train_part34(model_init_fn, optimizer_init_fn, num_epochs=num_epochs, is_training=True)
Опишите все эксперименты, результаты. Сделайте выводы.