2027/cpp/gradient__expression__graph_8hpp_source.html

// Copyright (c) Sleipnir contributors


#pragma once


#include <ranges>

#include <utility>


#include <Eigen/SparseCore>

#include <gch/small_vector.hpp>


#include "sleipnir/autodiff/expression_graph.hpp"

#include "sleipnir/autodiff/variable.hpp"

#include "sleipnir/autodiff/variable_matrix.hpp"

#include "sleipnir/util/assert.hpp"

#include "sleipnir/util/empty.hpp"


namespace slp::detail {


/// This class is an adapter type that performs value updates of an expression

/// graph, generates a gradient tree, or appends gradient triplets for creating

/// a sparse matrix of gradients.

///

/// @tparam Scalar Scalar type.

template <typename Scalar>


class GradientExpressionGraph {

 public:

  /// Generates the gradient graph for the given expression.

  ///

  /// @param root The root node of the expression.


  explicit GradientExpressionGraph(const Variable<Scalar>& root)

      : m_top_list{topological_sort(root.expr)} {

    for (const auto& node : m_top_list) {

      m_col_list.emplace_back(node->col);

    }

  }


  /// Updates the values of all nodes in this graph based on the values of their

  /// dependent nodes.

  void update_values() { detail::update_values(m_top_list); }


  /// Returns the variable's gradient tree.

  ///

  /// This function lazily allocates variables, so elements of the returned

  /// VariableMatrix will be empty if the corresponding element of wrt had no

  /// adjoint. Ensure Variable::expr isn't nullptr before calling member

  /// functions.

  ///

  /// @param wrt Variables with respect to which to compute the gradient.

  /// @return The variable's gradient tree.


  VariableMatrix<Scalar> generate_tree(

      const VariableMatrix<Scalar>& wrt) const {

    slp_assert(wrt.cols() == 1);


    // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation

    // for background on reverse accumulation automatic differentiation.


    if (m_top_list.empty()) {

      return VariableMatrix<Scalar>{detail::empty, wrt.rows(), 1};

    }


    // Set root node's adjoint to 1 since df/df is 1

    m_top_list[0]->adjoint_expr = constant_ptr(Scalar(1));


    // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y

    // multiplied by dy/dx. If there are multiple "paths" from the root node to

    // variable; the variable's adjoint is the sum of each path's adjoint

    // contribution.

    for (auto& node : m_top_list) {

      auto& lhs = node->args[0];

      auto& rhs = node->args[1];


      if (lhs != nullptr) {

        if (rhs != nullptr) {

          // Binary operator

          lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);

          rhs->adjoint_expr += node->grad_expr_r(lhs, rhs, node->adjoint_expr);

        } else {

          // Unary operator

          lhs->adjoint_expr += node->grad_expr_l(lhs, rhs, node->adjoint_expr);

        }

      }

    }


    // Move gradient tree to return value

    VariableMatrix<Scalar> grad{detail::empty, wrt.rows(), 1};

    for (int row = 0; row < grad.rows(); ++row) {

      grad[row] = Variable{std::move(wrt[row].expr->adjoint_expr)};

    }


    // Unlink adjoints to avoid circular references between them and their

    // parent expressions. This ensures all expressions are returned to the free

    // list.

    for (auto& node : m_top_list) {

      node->adjoint_expr = nullptr;

    }


    return grad;

  }


  /// Updates the adjoints in the expression graph (computes the gradient) then

  /// appends the adjoints of wrt to the sparse matrix triplets.

  ///

  /// @param triplets The sparse matrix triplets.

  /// @param row The row of wrt.

  /// @param wrt Vector of variables with respect to which to compute the

  ///     Jacobian.


  void append_triplets(gch::small_vector<Eigen::Triplet<Scalar>>& triplets,

                       int row, const VariableMatrix<Scalar>& wrt) const {

    slp_assert(wrt.cols() == 1);


    // Read docs/algorithms.md#Reverse_accumulation_automatic_differentiation

    // for background on reverse accumulation automatic differentiation.


    // If wrt has fewer nodes than graph, zero wrt's adjoints

    if (static_cast<size_t>(wrt.rows()) < m_top_list.size()) {

      for (const auto& elem : wrt) {

        elem.expr->adjoint = Scalar(0);

      }

    }


    if (m_top_list.empty()) {

      return;

    }


    // Set root node's adjoint to 1 since df/df is 1

    m_top_list[0]->adjoint = Scalar(1);


    // Zero the rest of the adjoints

    for (auto& node : m_top_list | std::views::drop(1)) {

      node->adjoint = Scalar(0);

    }


    // df/dx = (df/dy)(dy/dx). The adjoint of x is equal to the adjoint of y

    // multiplied by dy/dx. If there are multiple "paths" from the root node to

    // variable; the variable's adjoint is the sum of each path's adjoint

    // contribution.

    for (const auto& node : m_top_list) {

      auto& lhs = node->args[0];

      auto& rhs = node->args[1];


      if (lhs != nullptr) {

        if (rhs != nullptr) {

          // Binary operator

          lhs->adjoint += node->grad_l(lhs->val, rhs->val, node->adjoint);

          rhs->adjoint += node->grad_r(lhs->val, rhs->val, node->adjoint);

        } else {

          // Unary operator

          lhs->adjoint += node->grad_l(lhs->val, Scalar(0), node->adjoint);

        }

      }

    }


    // If wrt has fewer nodes than graph, iterate over wrt

    if (static_cast<size_t>(wrt.rows()) < m_top_list.size()) {

      for (int col = 0; col < wrt.rows(); ++col) {

        const auto& node = wrt[col].expr;


        // Append adjoints of wrt to sparse matrix triplets

        if (node->adjoint != Scalar(0)) {

          triplets.emplace_back(row, col, node->adjoint);

        }

      }

    } else {

      for (size_t i = 0; i < m_top_list.size(); ++i) {

        const auto& col = m_col_list[i];

        const auto& node = m_top_list[i];


        // Append adjoints of wrt to sparse matrix triplets

        if (col != -1 && node->adjoint != Scalar(0)) {

          triplets.emplace_back(row, col, node->adjoint);

        }

      }

    }

  }


 private:

  /// Topological sort of graph from parent to child

  gch::small_vector<Expression<Scalar>*> m_top_list;


  /// List that maps nodes to their respective column

  gch::small_vector<int> m_col_list;

};


}  // namespace slp::detail

assert.hpp

slp_assert
#define slp_assert(condition)
Aborts in C++.
Definition assert.hpp:25

slp::Variable
An autodiff variable pointing to an expression node.
Definition variable.hpp:47

slp::VariableMatrix
A matrix of autodiff variables.
Definition variable_matrix.hpp:33

slp::VariableMatrix::rows
int rows() const
Returns the number of rows in the matrix.
Definition variable_matrix.hpp:972

slp::VariableMatrix::cols
int cols() const
Returns the number of columns in the matrix.
Definition variable_matrix.hpp:977

slp::detail::GradientExpressionGraph::GradientExpressionGraph
GradientExpressionGraph(const Variable< Scalar > &root)
Generates the gradient graph for the given expression.
Definition gradient_expression_graph.hpp:30

slp::detail::GradientExpressionGraph::append_triplets
void append_triplets(gch::small_vector< Eigen::Triplet< Scalar > > &triplets, int row, const VariableMatrix< Scalar > &wrt) const
Updates the adjoints in the expression graph (computes the gradient) then appends the adjoints of wrt...
Definition gradient_expression_graph.hpp:107

slp::detail::GradientExpressionGraph::update_values
void update_values()
Updates the values of all nodes in this graph based on the values of their dependent nodes.
Definition gradient_expression_graph.hpp:39

slp::detail::GradientExpressionGraph::generate_tree
VariableMatrix< Scalar > generate_tree(const VariableMatrix< Scalar > &wrt) const
Returns the variable's gradient tree.
Definition gradient_expression_graph.hpp:50

empty.hpp

expression_graph.hpp

gch::small_vector
wpi::util::SmallVector< T > small_vector
Definition small_vector.hpp:10

slp::detail
Definition expression_graph.hpp:11

slp::detail::empty
static constexpr empty_t empty
Designates an uninitialized VariableMatrix.
Definition empty.hpp:11

slp::detail::update_values
void update_values(const gch::small_vector< Expression< Scalar > * > &list)
Updates the values of all nodes in this graph based on the values of their dependent nodes.
Definition expression_graph.hpp:77

slp::detail::constant_ptr
ExpressionPtr< Scalar > constant_ptr(Scalar value)
Creates an intrusive shared pointer to a constant expression.
Definition expression.hpp:417

slp::detail::topological_sort
gch::small_vector< Expression< Scalar > * > topological_sort(const ExpressionPtr< Scalar > &root)
Generates a topological sort of an expression graph from parent to child.
Definition expression_graph.hpp:20

small_vector.hpp

variable.hpp

variable_matrix.hpp