get_partial_gno_kernel_params

pure subroutine get_partial_gno_kernel_params_val(this, upstream_grad, output)

In-place gradient w.r.t. packed kernel params

Accumulate gradients over all edges: d(kappa_e)/dU = V^T @ diag(relu_mask) outer dx d(kappa_e)/db_u = V^T @ diag(relu_mask) dot upstream d(kappa_e)/dV = upstream outer relu(U dx + b_u) d(kappa_e)/db_v = upstream directly

Arguments

Type	Intent	Attributes		Name
class(array_type),	intent(in)		::	this	Forward result node containing saved operands
real(kind=real32),	intent(in),	dimension(:,:)	::	upstream_grad	Upstream gradient values
real(kind=real32),	intent(out),	dimension(:,:)	::	output	Output gradient values for packed kernel parameters

Source Code

  pure subroutine get_partial_gno_kernel_params_val( &
       this, upstream_grad, output)
    !! In-place gradient w.r.t. packed kernel params
    !!
    !! Accumulate gradients over all edges:
    !!   d(kappa_e)/dU   = V^T @ diag(relu_mask) outer dx
    !!   d(kappa_e)/db_u = V^T @ diag(relu_mask) dot upstream
    !!   d(kappa_e)/dV   = upstream outer relu(U dx + b_u)
    !!   d(kappa_e)/db_v = upstream directly
    implicit none

    ! Arguments
    class(array_type), intent(in) :: this
    !! Forward result node containing saved operands
    real(real32), dimension(:,:), intent(in)  :: upstream_grad
    !! Upstream gradient values
    real(real32), dimension(:,:), intent(out) :: output
    !! Output gradient values for packed kernel parameters

    ! Local variables
    integer :: d, H, F, num_e, e, k, f_idx
    !! Unpacked dimensions and loop indices
    integer :: off_U, off_bu, off_V, off_bv
    !! Flat offsets for packed kernel parameter blocks
    real(real32), allocatable :: U(:,:), b_u(:), V(:,:)
    !! Unpacked kernel parameter tensors
    real(real32), allocatable :: dx(:), pre_act(:), hidden(:)
    !! Per-edge buffers for input and activations
    real(real32), allocatable :: grad_hidden(:)  ! [H]
    !! Hidden-layer gradient buffer

    d = this%indices(1)
    H = this%indices(2)
    F = this%indices(3) * this%indices(4)
    num_e = size(this%left_operand%val, 2)

    off_U  = 0
    off_bu = H * d
    off_V  = off_bu + H
    off_bv = off_V + F * H

    allocate(U(H, d))
    U = reshape(this%right_operand%val(off_U+1:off_bu, 1), [H, d])
    allocate(b_u(H))
    b_u = this%right_operand%val(off_bu+1:off_V, 1)
    allocate(V(F, H))
    V = reshape(this%right_operand%val(off_V+1:off_bv, 1), [F, H])

    allocate(dx(d), pre_act(H), hidden(H), grad_hidden(H))

    output = 0.0_real32

    do e = 1, num_e
       dx = this%left_operand%val(:, e)
       pre_act = matmul(U, dx) + b_u
       hidden = max(pre_act, 0.0_real32)

       ! --- d/d(b_v): upstream_grad(:,e) directly ---
       output(off_bv+1:, 1) = output(off_bv+1:, 1) + upstream_grad(:, e)

       ! --- d/dV: upstream outer hidden => grad_V(f,h) += upstream(f,e)*hidden(h) ---
       do k = 1, H
          do f_idx = 1, F
             output(off_V + (k-1)*F + f_idx, 1) = &
                  output(off_V + (k-1)*F + f_idx, 1) + &
                  upstream_grad(f_idx, e) * hidden(k)
          end do
       end do

       ! --- Backprop through relu: grad_hidden = V^T @ upstream(:,e) * relu' ---
       grad_hidden = matmul(transpose(V), upstream_grad(:, e))
       do k = 1, H
          if(pre_act(k) .le. 0.0_real32) grad_hidden(k) = 0.0_real32
       end do

       ! --- d/d(b_u): grad_hidden directly ---
       output(off_bu+1:off_V, 1) = output(off_bu+1:off_V, 1) + grad_hidden

       ! --- d/dU: grad_hidden outer dx => grad_U(h,dd) += grad_hidden(h)*dx(dd) ---
       do k = 1, d
          do f_idx = 1, H
             output(off_U + (k-1)*H + f_idx, 1) = &
                  output(off_U + (k-1)*H + f_idx, 1) + &
                  grad_hidden(f_idx) * dx(k)
          end do
       end do
    end do

    deallocate(U, b_u, V, dx, pre_act, hidden, grad_hidden)

  end subroutine get_partial_gno_kernel_params_val

get_partial_gno_kernel_params_val Subroutine

Contents

Source Code

pure subroutine get_partial_gno_kernel_params_val(this, upstream_grad, output)

Arguments

Source Code