athena__optimiser Module

Module containing implementations of optimisation methods

This module implements gradient-based optimisers for training neural networks by minimizing loss functions through iterative parameter updates.

Implemented optimisers:

SGD (Stochastic Gradient Descent): θ_{t+1} = θ_t - η * ∇L(θ_t) Simple, reliable baseline optimiser

SGD with Momentum: v_{t+1} = μ*v_t + ∇L(θ_t) θ_{t+1} = θ_t - η * v_{t+1} Accelerates convergence, dampens oscillations

RMSProp: s_{t+1} = βs_t + (1-β)[∇L(θ_t)]² θ_{t+1} = θ_t - η * ∇L(θ_t) / sqrt(s_{t+1} + ε) Adapts learning rate per parameter, good for non-stationary objectives

Adagrad: s_{t+1} = s_t + [∇L(θ_t)]² θ_{t+1} = θ_t - η * ∇L(θ_t) / sqrt(s_{t+1} + ε) Adapts learning rate based on historical gradients

Adam (Adaptive Moment Estimation): m_{t+1} = β₁m_t + (1-β₁)∇L(θ_t) [first moment] v_{t+1} = β₂v_t + (1-β₂)[∇L(θ_t)]² [second moment] m̂ = m_{t+1}/(1-β₁^t), v̂ = v_{t+1}/(1-β₂^t) [bias correction] θ_{t+1} = θ_t - η * m̂ / (sqrt(v̂) + ε) Combines momentum and adaptive learning rates, most popular choice

L-BFGS (Limited-memory BFGS): Quasi-Newton method approximating Hessian inverse Good for small-medium sized problems, smooth objectives

where η is learning rate, μ is momentum, β/β₁/β₂ are decay rates

Attribution statement: The following module is based on code from the neural-fortran library https://github.com/modern-fortran/neural-fortran The implementation of optimiser_base_type, sgd_optimiser_type, rmsprop_optimiser_type, adagrad_optimiser_type, and adam_optimiser_type are based on the corresponding types from neural-fortran



Interfaces

public interface adagrad_optimiser_type

Interface for setting up the Adagrad optimiser

  • private module function optimiser_setup_adagrad(learning_rate, epsilon, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

    Set up the Adagrad optimiser

    Arguments

    Type IntentOptional Attributes Name
    real(kind=real32), intent(in), optional :: learning_rate

    Learning rate and epsilon

    real(kind=real32), intent(in), optional :: epsilon

    Learning rate and epsilon

    integer, intent(in), optional :: num_params

    Number of parameters

    class(base_regulariser_type), intent(in), optional :: regulariser

    Regularisation method

    type(clip_type), intent(in), optional :: clip_dict

    Clipping dictionary

    class(base_lr_decay_type), intent(in), optional :: lr_decay

    Learning rate decay method

    Return Value type(adagrad_optimiser_type)

    Instance of the Adagrad optimiser

public interface adam_optimiser_type

Interface for setting up the Adam optimiser

  • private module function optimiser_setup_adam(learning_rate, beta1, beta2, epsilon, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

    Set up the Adam optimiser

    Arguments

    Type IntentOptional Attributes Name
    real(kind=real32), intent(in), optional :: learning_rate

    Learning rate

    real(kind=real32), intent(in), optional :: beta1

    Beta1, beta2, and epsilon

    real(kind=real32), intent(in), optional :: beta2

    Beta1, beta2, and epsilon

    real(kind=real32), intent(in), optional :: epsilon

    Beta1, beta2, and epsilon

    integer, intent(in), optional :: num_params

    Number of parameters

    class(base_regulariser_type), intent(in), optional :: regulariser

    Regularisation method

    type(clip_type), intent(in), optional :: clip_dict

    Clipping dictionary

    class(base_lr_decay_type), intent(in), optional :: lr_decay

    Learning rate decay method

    Return Value type(adam_optimiser_type)

    Instance of the Adam optimiser

public interface base_optimiser_type

Interface for setting up the base optimiser

  • private module function optimiser_setup_base(learning_rate, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

    Set up the base optimiser

    Arguments

    Type IntentOptional Attributes Name
    real(kind=real32), intent(in), optional :: learning_rate

    Learning rate

    integer, intent(in), optional :: num_params

    Number of parameters

    class(base_regulariser_type), intent(in), optional :: regulariser

    Regularisation method

    type(clip_type), intent(in), optional :: clip_dict

    Clipping dictionary

    class(base_lr_decay_type), intent(in), optional :: lr_decay

    Learning rate decay method

    Return Value type(base_optimiser_type)

    Instance of the base optimiser

public interface rmsprop_optimiser_type

Interface for setting up the RMSprop optimiser

  • private module function optimiser_setup_rmsprop(learning_rate, beta, epsilon, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

    Set up the RMSprop optimiser

    Arguments

    Type IntentOptional Attributes Name
    real(kind=real32), intent(in), optional :: learning_rate

    Learning rate, beta, and epsilon

    real(kind=real32), intent(in), optional :: beta

    Learning rate, beta, and epsilon

    real(kind=real32), intent(in), optional :: epsilon

    Learning rate, beta, and epsilon

    integer, intent(in), optional :: num_params

    Number of parameters

    class(base_regulariser_type), intent(in), optional :: regulariser

    Regularisation method

    type(clip_type), intent(in), optional :: clip_dict

    Clipping dictionary

    class(base_lr_decay_type), intent(in), optional :: lr_decay

    Learning rate decay method

    Return Value type(rmsprop_optimiser_type)

    Instance of the RMSprop optimiser

public interface sgd_optimiser_type

Interface for setting up the SGD optimiser

  • private module function optimiser_setup_sgd(learning_rate, momentum, nesterov, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

    Set up the SGD optimiser

    Arguments

    Type IntentOptional Attributes Name
    real(kind=real32), intent(in), optional :: learning_rate

    Learning rate and momentum

    real(kind=real32), intent(in), optional :: momentum

    Learning rate and momentum

    logical, intent(in), optional :: nesterov

    Nesterov momentum

    integer, intent(in), optional :: num_params

    Number of parameters

    class(base_regulariser_type), intent(in), optional :: regulariser

    Regularisation method

    type(clip_type), intent(in), optional :: clip_dict

    Clipping dictionary

    class(base_lr_decay_type), intent(in), optional :: lr_decay

    Learning rate decay method

    Return Value type(sgd_optimiser_type)

    Instance of the SGD optimiser


Derived Types

type, public, extends(base_optimiser_type) ::  adagrad_optimiser_type

Adagrad optimiser type

Components

Type Visibility Attributes Name Initial
type(clip_type), public :: clip_dict

Clipping dictionary

integer, public :: epoch = 0

Epoch number

real(kind=real32), public :: epsilon = 1.E-8_real32

Epsilon parameter

integer, public :: iter = 0

Iteration number

real(kind=real32), public :: learning_rate = 0.01_real32

Learning rate hyperparameter

class(base_lr_decay_type), public, allocatable :: lr_decay

Learning rate decay method

character(len=20), public :: name

Name of the optimiser

logical, public :: regularisation = .false.

Apply regularisation

class(base_regulariser_type), public, allocatable :: regulariser

Regularisation method

real(kind=real32), public, allocatable, dimension(:) :: sum_squares

Sum of squares of gradients

Constructor

Interface for setting up the Adagrad optimiser

private module function optimiser_setup_adagrad (learning_rate, epsilon, num_params, regulariser, clip_dict, lr_decay)

Set up the Adagrad optimiser

Type-Bound Procedures

procedure, public, pass(this) :: init => init_base

Initialise base optimiser

procedure, public, pass(this) :: init_gradients => init_gradients_adagrad

Initialise gradients for Adagrad

procedure, public, pass(this) :: minimise => minimise_adagrad

Apply gradients to parameters to minimise loss using Adagrad optimiser

procedure, public, pass(this) :: print_to_unit => print_to_unit_base

Print base optimiser information

procedure, public, pass(this) :: read => read_base

Read base optimiser information

type, public, extends(base_optimiser_type) ::  adam_optimiser_type

Adam optimiser type

Components

Type Visibility Attributes Name Initial
real(kind=real32), public :: beta1 = 0.9_real32

Beta1 parameter

real(kind=real32), public :: beta2 = 0.999_real32

Beta2 parameter

type(clip_type), public :: clip_dict

Clipping dictionary

integer, public :: epoch = 0

Epoch number

real(kind=real32), public :: epsilon = 1.E-8_real32

Epsilon parameter

integer, public :: iter = 0

Iteration number

real(kind=real32), public :: learning_rate = 0.01_real32

Learning rate hyperparameter

class(base_lr_decay_type), public, allocatable :: lr_decay

Learning rate decay method

real(kind=real32), public, allocatable, dimension(:) :: m

First moment estimate

character(len=20), public :: name

Name of the optimiser

logical, public :: regularisation = .false.

Apply regularisation

class(base_regulariser_type), public, allocatable :: regulariser

Regularisation method

real(kind=real32), public, allocatable, dimension(:) :: v

Second moment estimate

Constructor

Interface for setting up the Adam optimiser

private module function optimiser_setup_adam (learning_rate, beta1, beta2, epsilon, num_params, regulariser, clip_dict, lr_decay)

Set up the Adam optimiser

Type-Bound Procedures

procedure, public, pass(this) :: init => init_base

Initialise base optimiser

procedure, public, pass(this) :: init_gradients => init_gradients_adam

Initialise gradients for Adam

procedure, public, pass(this) :: minimise => minimise_adam

Apply gradients to parameters to minimise loss using Adam optimiser

procedure, public, pass(this) :: print_to_unit => print_to_unit_base

Print base optimiser information

procedure, public, pass(this) :: read => read_base

Read base optimiser information

type, public ::  base_optimiser_type

Base optimiser type

Components

Type Visibility Attributes Name Initial
type(clip_type), public :: clip_dict

Clipping dictionary

integer, public :: epoch = 0

Epoch number

integer, public :: iter = 0

Iteration number

real(kind=real32), public :: learning_rate = 0.01_real32

Learning rate hyperparameter

class(base_lr_decay_type), public, allocatable :: lr_decay

Learning rate decay method

character(len=20), public :: name

Name of the optimiser

logical, public :: regularisation = .false.

Apply regularisation

class(base_regulariser_type), public, allocatable :: regulariser

Regularisation method

Constructor

Interface for setting up the base optimiser

private module function optimiser_setup_base (learning_rate, num_params, regulariser, clip_dict, lr_decay)

Set up the base optimiser

Type-Bound Procedures

procedure, public, pass(this) :: init => init_base

Initialise base optimiser

procedure, public, pass(this) :: init_gradients => init_gradients_base

Initialise gradients

procedure, public, pass(this) :: minimise => minimise_base

Apply gradients to parameters to minimise loss using base optimiser

procedure, public, pass(this) :: print_to_unit => print_to_unit_base

Print base optimiser information

procedure, public, pass(this) :: read => read_base

Read base optimiser information

type, public, extends(base_optimiser_type) ::  rmsprop_optimiser_type

RMSprop optimiser type

Components

Type Visibility Attributes Name Initial
real(kind=real32), public :: beta = 0._real32

Beta parameter

type(clip_type), public :: clip_dict

Clipping dictionary

integer, public :: epoch = 0

Epoch number

real(kind=real32), public :: epsilon = 1.E-8_real32

Epsilon parameter

integer, public :: iter = 0

Iteration number

real(kind=real32), public :: learning_rate = 0.01_real32

Learning rate hyperparameter

class(base_lr_decay_type), public, allocatable :: lr_decay

Learning rate decay method

real(kind=real32), public, allocatable, dimension(:) :: moving_avg

Moving average

character(len=20), public :: name

Name of the optimiser

logical, public :: regularisation = .false.

Apply regularisation

class(base_regulariser_type), public, allocatable :: regulariser

Regularisation method

Constructor

Interface for setting up the RMSprop optimiser

private module function optimiser_setup_rmsprop (learning_rate, beta, epsilon, num_params, regulariser, clip_dict, lr_decay)

Set up the RMSprop optimiser

Type-Bound Procedures

procedure, public, pass(this) :: init => init_base

Initialise base optimiser

procedure, public, pass(this) :: init_gradients => init_gradients_rmsprop

Initialise gradients for RMSprop

procedure, public, pass(this) :: minimise => minimise_rmsprop

Apply gradients to parameters to minimise loss using RMSprop optimiser

procedure, public, pass(this) :: print_to_unit => print_to_unit_base

Print base optimiser information

procedure, public, pass(this) :: read => read_base

Read base optimiser information

type, public, extends(base_optimiser_type) ::  sgd_optimiser_type

Stochastic gradient descent optimiser type

Components

Type Visibility Attributes Name Initial
type(clip_type), public :: clip_dict

Clipping dictionary

integer, public :: epoch = 0

Epoch number

integer, public :: iter = 0

Iteration number

real(kind=real32), public :: learning_rate = 0.01_real32

Learning rate hyperparameter

class(base_lr_decay_type), public, allocatable :: lr_decay

Learning rate decay method

real(kind=real32), public :: momentum = 0._real32

Fraction of momentum-based learning

character(len=20), public :: name

Name of the optimiser

logical, public :: nesterov = .false.

Nesterov momentum

logical, public :: regularisation = .false.

Apply regularisation

class(base_regulariser_type), public, allocatable :: regulariser

Regularisation method

real(kind=real32), public, allocatable, dimension(:) :: velocity

Velocity for momentum

Constructor

Interface for setting up the SGD optimiser

private module function optimiser_setup_sgd (learning_rate, momentum, nesterov, num_params, regulariser, clip_dict, lr_decay)

Set up the SGD optimiser

Type-Bound Procedures

procedure, public, pass(this) :: init => init_base

Initialise base optimiser

procedure, public, pass(this) :: init_gradients => init_gradients_sgd

Initialise gradients for SGD

procedure, public, pass(this) :: minimise => minimise_sgd

Apply gradients to parameters to minimise loss using SGD optimiser

procedure, public, pass(this) :: print_to_unit => print_to_unit_base

Print base optimiser information

procedure, public, pass(this) :: read => read_base

Read base optimiser information


Functions

private module function optimiser_setup_adagrad(learning_rate, epsilon, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

Set up the Adagrad optimiser

Arguments

Type IntentOptional Attributes Name
real(kind=real32), intent(in), optional :: learning_rate

Learning rate

real(kind=real32), intent(in), optional :: epsilon

Epsilon

integer, intent(in), optional :: num_params

Number of parameters

class(base_regulariser_type), intent(in), optional :: regulariser

Regularisation method

type(clip_type), intent(in), optional :: clip_dict

Clipping dictionary

class(base_lr_decay_type), intent(in), optional :: lr_decay

Learning rate decay method

Return Value type(adagrad_optimiser_type)

Instance of the Adagrad optimiser

private module function optimiser_setup_adam(learning_rate, beta1, beta2, epsilon, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

Set up the Adam optimiser

Arguments

Type IntentOptional Attributes Name
real(kind=real32), intent(in), optional :: learning_rate

Learning rate

real(kind=real32), intent(in), optional :: beta1

Beta1, beta2, and epsilon

real(kind=real32), intent(in), optional :: beta2

Beta1, beta2, and epsilon

real(kind=real32), intent(in), optional :: epsilon

Beta1, beta2, and epsilon

integer, intent(in), optional :: num_params

Number of parameters

class(base_regulariser_type), intent(in), optional :: regulariser

Regularisation method

type(clip_type), intent(in), optional :: clip_dict

Clipping dictionary

class(base_lr_decay_type), intent(in), optional :: lr_decay

Learning rate decay method

Return Value type(adam_optimiser_type)

Instance of the Adam optimiser

private module function optimiser_setup_base(learning_rate, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

Set up the base optimiser

Arguments

Type IntentOptional Attributes Name
real(kind=real32), intent(in), optional :: learning_rate

Learning rate

integer, intent(in), optional :: num_params

Number of parameters

class(base_regulariser_type), intent(in), optional :: regulariser

Regularisation method

type(clip_type), intent(in), optional :: clip_dict

Clipping dictionary

class(base_lr_decay_type), intent(in), optional :: lr_decay

Learning rate decay method

Return Value type(base_optimiser_type)

Instance of the base optimiser

private module function optimiser_setup_rmsprop(learning_rate, beta, epsilon, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

Set up the RMSprop optimiser

Arguments

Type IntentOptional Attributes Name
real(kind=real32), intent(in), optional :: learning_rate

Learning rate

real(kind=real32), intent(in), optional :: beta

Beta and epsilon

real(kind=real32), intent(in), optional :: epsilon

Beta and epsilon

integer, intent(in), optional :: num_params

Number of parameters

class(base_regulariser_type), intent(in), optional :: regulariser

Regularisation method

type(clip_type), intent(in), optional :: clip_dict

Clipping dictionary

class(base_lr_decay_type), intent(in), optional :: lr_decay

Learning rate decay method

Return Value type(rmsprop_optimiser_type)

Instance of the RMSprop optimiser

private module function optimiser_setup_sgd(learning_rate, momentum, nesterov, num_params, regulariser, clip_dict, lr_decay) result(optimiser)

Set up the SGD optimiser

Arguments

Type IntentOptional Attributes Name
real(kind=real32), intent(in), optional :: learning_rate

Learning rate and momentum

real(kind=real32), intent(in), optional :: momentum

Learning rate and momentum

logical, intent(in), optional :: nesterov

Nesterov momentum

integer, intent(in), optional :: num_params

Number of parameters

class(base_regulariser_type), intent(in), optional :: regulariser

Regularisation method

type(clip_type), intent(in), optional :: clip_dict

Clipping dictionary

class(base_lr_decay_type), intent(in), optional :: lr_decay

Learning rate decay method

Return Value type(sgd_optimiser_type)

Instance of the SGD optimiser


Subroutines

private subroutine init_base(this, num_params, regulariser, clip_dict)

Initialise base optimiser

Arguments

Type IntentOptional Attributes Name
class(base_optimiser_type), intent(inout) :: this

Instance of the base optimiser

integer, intent(in) :: num_params

Number of parameters

class(base_regulariser_type), intent(in), optional :: regulariser

Regularisation method

type(clip_type), intent(in), optional :: clip_dict

Clipping dictionary

private pure subroutine init_gradients_adagrad(this, num_params)

Initialise gradients for Adagrad optimiser

Arguments

Type IntentOptional Attributes Name
class(adagrad_optimiser_type), intent(inout) :: this

Instance of the Adagrad optimiser

integer, intent(in) :: num_params

Number of parameters

private pure subroutine init_gradients_adam(this, num_params)

Initialise gradients for Adam optimiser

Arguments

Type IntentOptional Attributes Name
class(adam_optimiser_type), intent(inout) :: this

Instance of the Adam optimiser

integer, intent(in) :: num_params

Number of parameters

private pure subroutine init_gradients_base(this, num_params)

Initialise gradients for base optimiser

Arguments

Type IntentOptional Attributes Name
class(base_optimiser_type), intent(inout) :: this

Instance of the base optimiser

integer, intent(in) :: num_params

Number of parameters

private pure subroutine init_gradients_rmsprop(this, num_params)

Initialise gradients for RMSprop optimiser

Arguments

Type IntentOptional Attributes Name
class(rmsprop_optimiser_type), intent(inout) :: this

Instance of the RMSprop optimiser

integer, intent(in) :: num_params

Number of parameters

private pure subroutine init_gradients_sgd(this, num_params)

Initialise gradients for SGD optimiser

Arguments

Type IntentOptional Attributes Name
class(sgd_optimiser_type), intent(inout) :: this

Instance of the SGD optimiser

integer, intent(in) :: num_params

Number of parameters

private pure subroutine minimise_adagrad(this, param, gradient)

Apply gradients to parameters to minimise loss using Adagrad optimiser

Arguments

Type IntentOptional Attributes Name
class(adagrad_optimiser_type), intent(inout) :: this

Instance of the Adagrad optimiser

real(kind=real32), intent(inout), dimension(:) :: param

Parameters

real(kind=real32), intent(inout), dimension(:) :: gradient

Gradients

private pure subroutine minimise_adam(this, param, gradient)

Apply gradients to parameters to minimise loss using Adam optimiser

Arguments

Type IntentOptional Attributes Name
class(adam_optimiser_type), intent(inout) :: this

Instance of the Adam optimiser

real(kind=real32), intent(inout), dimension(:) :: param

Parameters

real(kind=real32), intent(inout), dimension(:) :: gradient

Gradients

private pure subroutine minimise_base(this, param, gradient)

Apply gradients to parameters to minimise loss using base optimiser

Arguments

Type IntentOptional Attributes Name
class(base_optimiser_type), intent(inout) :: this

Instance of the base optimiser

real(kind=real32), intent(inout), dimension(:) :: param

Parameters

real(kind=real32), intent(inout), dimension(:) :: gradient

Gradients

private pure subroutine minimise_rmsprop(this, param, gradient)

Apply gradients to parameters to minimise loss using RMSprop optimiser

Arguments

Type IntentOptional Attributes Name
class(rmsprop_optimiser_type), intent(inout) :: this

Instance of the RMSprop optimiser

real(kind=real32), intent(inout), dimension(:) :: param

Parameters

real(kind=real32), intent(inout), dimension(:) :: gradient

Gradients

private pure subroutine minimise_sgd(this, param, gradient)

Apply gradients to parameters to minimise loss using SGD optimiser Adaptive learning method

Arguments

Type IntentOptional Attributes Name
class(sgd_optimiser_type), intent(inout) :: this

Instance of the SGD optimiser

real(kind=real32), intent(inout), dimension(:) :: param

Parameters

real(kind=real32), intent(inout), dimension(:) :: gradient

Gradients

private subroutine print_to_unit_base(this, unit)

Print base optimiser information

Arguments

Type IntentOptional Attributes Name
class(base_optimiser_type), intent(in) :: this

Instance of the base optimiser

integer, intent(in) :: unit

File unit

private subroutine read_base(this, unit)

Read base optimiser information

Arguments

Type IntentOptional Attributes Name
class(base_optimiser_type), intent(inout) :: this

Instance of the base optimiser

integer, intent(in) :: unit

File unit