Source code for autocpd.neuralnetwork

import warnings
from pathlib import Path

import numpy as np
import tensorflow as tf
from keras import layers, losses, metrics, models


[docs] def general_simple_nn(n, l, m, num_classes, model_name="simple_nn"): """ To construct a simple neural network. Parameters ---------- n : scalar the input size l : scalar the number of hidden layers m : scalar or 1D array the width vector of hidden layers, if it is a scalar, then the hidden layers of simple neural network have the same nodes. num_classes : scalar the nodes of output layers, i.e., the number of classes model_name : str, optional the model name, by default "simple_nn" Returns ------- model the simple neural network """ input_layer = layers.Input(shape=(n,), name="Input") if isinstance(m, int): m_vec = np.repeat(m, l) elif len(m) == l: m_vec = m else: warnings.warn( "The length of width vector must be equal to the number of hidden layers.", DeprecationWarning, ) x = layers.Dense(m_vec[0], activation="relu", kernel_regularizer="l2")(input_layer) if l >= 2: for k in range(l - 1): x = layers.Dense(m_vec[k + 1], activation="relu", kernel_regularizer="l2")( x ) output_layer = layers.Dense(num_classes)(x) model = models.Model(input_layer, output_layer, name=model_name) return model
# mymodel = simple_nn(n=100, l=1, m=10, num_classes=2) # mymodel = simple_nn(n=100, l=3, m=10, num_classes=2) # mymodel = simple_nn(n=100, l=3, m=[20, 20, 5], num_classes=2) # build the model, train and save it to disk
[docs] def get_optimizer(learning_rate): """ To get the optimizer given the learning rate. Parameters ---------- learning_rate : float the learning rate for inverse time decay schedule. Returns ------- optimizer the Adam """ lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay( learning_rate, decay_steps=10000, decay_rate=1, staircase=False ) return tf.keras.optimizers.Adam(lr_schedule)
[docs] def get_callbacks(name, log_dir, epochdots): """ Get callbacks. This function returns the result of epochs during training, if it satisfies some conditions then the training can stop early. At meanwhile, this function also save the results of training in TensorBoard and csv files. Parameters ---------- name : str the model name log_dir : str the path of log files epochdots : object the EpochDots object from tensorflow_docs Returns ------- list the list of callbacks """ name1 = name + "/log.csv" return [ epochdots, tf.keras.callbacks.EarlyStopping( monitor="val_sparse_categorical_crossentropy", patience=800, min_delta=1e-3 ), tf.keras.callbacks.TensorBoard(Path(log_dir, name)), tf.keras.callbacks.CSVLogger(Path(log_dir, name1)), ]
[docs] def compile_and_fit( model, x_train, y_train, batch_size, lr, name, log_dir, epochdots, optimizer=None, validation_split=0.2, max_epochs=10000, ): """ To compile and fit the model Parameters ---------- model : Models object the simple neural network x_train : tf.Tensor the tensor of training data y_train : tf.Tensor the tensor of training data, label batch_size : int the batch size lr : float the learning rate name : str the model name log_dir : str the path of log files epochdots : object the EpochDots object from tensorflow_docs optimizer : optimizer object or str, optional the optimizer, by default None max_epochs : int, optional the maximum number of epochs, by default 10000 Returns ------- model.fit object a fitted model object """ if optimizer is None: optimizer = get_optimizer(lr) model.compile( optimizer=optimizer, loss=losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[ metrics.SparseCategoricalCrossentropy( from_logits=True, name="sparse_categorical_crossentropy" ), "accuracy", ], ) history = model.fit( x_train, y_train, epochs=max_epochs, batch_size=batch_size, validation_split=validation_split, callbacks=get_callbacks(name, log_dir, epochdots), verbose=2, ) return history
[docs] def resblock(x, kernel_size, filters, strides=1): """ This function constructs a resblock. Parameters ---------- x : tensor the input data kernel_size : int the kernel size filters : int the filter size strides : int, optional the stride, by default 1 Returns ------- layer the hidden layer """ x1 = layers.Conv2D(filters, kernel_size, strides=strides, padding="same")(x) x1 = layers.BatchNormalization()(x1) x1 = layers.ReLU()(x1) x1 = layers.Conv2D(filters, kernel_size, padding="same")(x1) x1 = layers.BatchNormalization()(x1) if strides != 1: x = layers.Conv2D(filters, 1, strides=strides, padding="same")(x) x = layers.BatchNormalization()(x) x1 = layers.Add()([x, x1]) x1 = layers.ReLU()(x1) return x1
[docs] def deep_nn( n, n_trans, kernel_size, n_filter, dropout_rate, n_classes, m, l, model_name="deep_nn", ): """ This function is used to construct the deep neural network with 21 residual blocks. Parameters ---------- n : int the length of time series n_trans : int the number of transformations kernel_size : int the kernel size n_filter : int the filter size dropout_rate : float the dropout rate n_classes : int the number of classes m : array the width vector l : int the number of dense layers model_name : str, optional the model name, by default "deep_nn" Returns ------- model the model of deep neural network """ # Note: the following network will cost several hours to train the residual neural network in GPU server. input_layer = layers.Input(shape=(n_trans, n), name="Input") x = layers.Reshape((n_trans, n, 1))(input_layer) x = layers.Conv2D(n_filter, 2, padding="same")(x) x = layers.BatchNormalization()(x) x = layers.ReLU()(x) x = layers.MaxPooling2D((2, 2))(x) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter, strides=(1, 2)) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter, strides=(1, 2)) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter, strides=(1, 2)) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter, strides=(1, 2)) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = layers.GlobalAveragePooling2D()(x) for i in range(l - 1): x = layers.Dense(m[i], activation="relu", kernel_regularizer="l2")(x) x = layers.Dropout(dropout_rate)(x) x = layers.Dense(m[l - 1], activation="relu", kernel_regularizer="l2")(x) output_layer = layers.Dense(n_classes)(x) model = models.Model(input_layer, output_layer, name=model_name) return model
[docs] def general_deep_nn( n, n_trans, kernel_size, n_filter, dropout_rate, n_classes, n_resblock, m, l, model_name="deep_nn", ): """ This function is used to construct the deep neural network with 21 residual blocks. Parameters ---------- n : int the length of time series n_trans : int the number of transformations kernel_size : int the kernel size n_filter : int the filter size dropout_rate : float the dropout rate n_classes : int the number of classes n_resnet : int the number of residual blocks m : array the width vector l : int the number of dense layers model_name : str, optional the model name, by default "deep_nn" Returns ------- model the model of deep neural network """ # Note: the following network will cost several hours to train the residual neural network in GPU server. input_layer = layers.Input(shape=(n_trans, n), name="Input") x = layers.Reshape((n_trans, n, 1))(input_layer) x = layers.Conv2D(n_filter, 2, padding="same")(x) x = layers.BatchNormalization()(x) x = layers.ReLU()(x) x = layers.MaxPooling2D((2, 2))(x) j1 = n_resblock % 4 for _ in range(j1): x = resblock(x, kernel_size, filters=n_filter) j2 = n_resblock // 4 if j2 > 0: for _ in range(j2): x = resblock(x, kernel_size, filters=n_filter, strides=(1, 2)) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = resblock(x, kernel_size, filters=n_filter) x = layers.GlobalAveragePooling2D()(x) for i in range(l - 1): x = layers.Dense(m[i], activation="relu", kernel_regularizer="l2")(x) x = layers.Dropout(dropout_rate)(x) x = layers.Dense(m[l - 1], activation="relu", kernel_regularizer="l2")(x) output_layer = layers.Dense(n_classes)(x) model = models.Model(input_layer, output_layer, name=model_name) return model