Years ago I was exploring automatic differentiation in C++. Here's the code I came up with.
#include <cmath>
#include <functional>
#include <limits>
#include <iostream>
#include <vector>
namespace agrad {
typedef std::vector<double> adj_t;
typedef std::function<void(adj_t&)> chain_t;
std::vector<chain_t> stack_;
std::size_t next_idx_ = 0;
inline std::size_t next_idx() {
return next_idx_++;
}
struct var {
double val_;
int idx_;
var(double val) : val_(val), idx_(next_idx()) { }
};
inline var operator+(const var& x1, const var& x2) {
var y(x1.val_ + x2.val_);
stack_.emplace_back([=](adj_t& adj) {
adj[x1.idx_] += adj[y.idx_];
adj[x2.idx_] += adj[y.idx_];
});
return y;
}
inline var operator+(const var& x1, double x2) {
var y(x1.val_ + x2);
stack_.emplace_back([=](adj_t& adj) {
adj[x1.idx_] += adj[y.idx_];
});
return y;
}
inline var operator+(double x1, const var& x2) {
var y(x1 + x2.val_);
stack_.emplace_back([=](adj_t& adj) {
adj[x2.idx_] += adj[y.idx_];
});
return y;
}
inline var operator*(const var& x1, const var& x2) {
var y(x1.val_ * x2.val_);
stack_.emplace_back([=](adj_t& adj) {
adj[x1.idx_] += x2.val_ * adj[y.idx_];
adj[x2.idx_] += x1.val_ * adj[y.idx_];
});
return y;
}
inline var operator*(const var& x1, double x2) {
var y(x1.val_ * x2);
stack_.emplace_back([=](adj_t& adj) {
adj[x1.idx_] += x2 * adj[y.idx_];
});
return y;
}
inline var operator*(double x1, const var& x2) {
var y(x1 * x2.val_);
stack_.emplace_back([=](adj_t& adj) {
adj[x2.idx_] += x1 * adj[y.idx_];
});
return y;
}
inline var exp(const var& x) {
var y(std::exp(x.val_));
stack_.emplace_back([=](adj_t& adj) {
adj[x.idx_] += y.val_ * adj[y.idx_];
});
return y;
}
inline var log(const var& x) {
var y(std::log(x.val_));
stack_.emplace_back([=](adj_t& adj) {
adj[x.idx_] += adj[y.idx_] / x.val_;
});
return y;
}
inline var pow(const var& x1, double x2) {
var y(std::pow(x1.val_, x2));
stack_.emplace_back([=](adj_t& adj) {
adj[x1.idx_] += adj[y.idx_] * x2 * y.val_ / x1.val_;
});
return y;
}
std::vector<double> grad(const var& y, const std::vector<var>& x) {
std::vector<double> adj(y.idx_ + 1, 0.0);
adj[y.idx_] = 1;
for (auto chain_f = stack_.crbegin(); chain_f != stack_.crend(); ++chain_f)
(*chain_f)(adj);
std::vector<double> dy_dx(x.size());
for (std::size_t i = 0; i < x.size(); ++i)
dy_dx[i] = adj[x[i].idx_];
return dy_dx;
}
} // namespace agrad
int main() {
agrad::stack_.clear();
using agrad::var;
using agrad::log;
using agrad::exp;
using agrad::pow;
var x1 = 3;
std::vector<var> x = { x1 };
var y = log(pow(x1, 2)); // f: R -> R function to differentiate
std::vector<double> dy_dx = agrad::grad(y, x);
std::cout << "y = " << y.val_ << std::endl;
for (std::size_t i = 0; i < x.size(); ++i)
std::cout << "dy / dx[" << (i + 1) << "] = " << dy_dx[i] << std::endl;
return 0;
}