Years ago I was exploring automatic differentiation in C++. Here's the code I came up with.

#include <cmath>
#include <functional>
#include <limits>
#include <iostream>
#include <vector>

namespace agrad {

typedef std::vector<double> adj_t;
typedef std::function<void(adj_t&)> chain_t;
std::vector<chain_t> stack_;

std::size_t next_idx_ = 0;
inline std::size_t next_idx() {
  return next_idx_++;
}

struct var {
  double val_;
  int idx_;
  var(double val) : val_(val), idx_(next_idx()) { }
};

inline var operator+(const var& x1, const var& x2) {
  var y(x1.val_ + x2.val_);
  stack_.emplace_back([=](adj_t& adj) {
      adj[x1.idx_] += adj[y.idx_];
      adj[x2.idx_] += adj[y.idx_];
    });
  return y;
}
inline var operator+(const var& x1, double x2) {
  var y(x1.val_ + x2);
  stack_.emplace_back([=](adj_t& adj) {
      adj[x1.idx_] += adj[y.idx_];
    });
  return y;
}
inline var operator+(double x1, const var& x2) {
  var y(x1 + x2.val_);
  stack_.emplace_back([=](adj_t& adj) {
      adj[x2.idx_] += adj[y.idx_];
    });
  return y;
}

inline var operator*(const var& x1, const var& x2) {
  var y(x1.val_ * x2.val_);
  stack_.emplace_back([=](adj_t& adj) {
      adj[x1.idx_] += x2.val_ * adj[y.idx_];
      adj[x2.idx_] += x1.val_ * adj[y.idx_];
    });
  return y;
}
inline var operator*(const var& x1, double x2) {
  var y(x1.val_ * x2);
  stack_.emplace_back([=](adj_t& adj) {
      adj[x1.idx_] += x2 * adj[y.idx_];
    });
  return y;
}
inline var operator*(double x1, const var& x2) {
  var y(x1 * x2.val_);
  stack_.emplace_back([=](adj_t& adj) {
      adj[x2.idx_] += x1 * adj[y.idx_];
    });
  return y;
}

inline var exp(const var& x) {
  var y(std::exp(x.val_));
  stack_.emplace_back([=](adj_t& adj) {
      adj[x.idx_] += y.val_ * adj[y.idx_];
    });
  return y;
}

inline var log(const var& x) {
  var y(std::log(x.val_));
  stack_.emplace_back([=](adj_t& adj) {
      adj[x.idx_] += adj[y.idx_] / x.val_;
    });
  return y;
}

inline var pow(const var& x1, double x2) {
  var y(std::pow(x1.val_, x2));
  stack_.emplace_back([=](adj_t& adj) {
      adj[x1.idx_] += adj[y.idx_] * x2 * y.val_ / x1.val_;
    });
  return y;
}

std::vector<double> grad(const var& y, const std::vector<var>& x) {
  std::vector<double> adj(y.idx_ + 1, 0.0);
  adj[y.idx_] = 1;
  for (auto chain_f = stack_.crbegin(); chain_f != stack_.crend(); ++chain_f)
    (*chain_f)(adj);
  std::vector<double> dy_dx(x.size());
  for (std::size_t i = 0; i < x.size(); ++i)
    dy_dx[i] = adj[x[i].idx_];
  return dy_dx;
}
} // namespace agrad

int main() {
  agrad::stack_.clear();
  using agrad::var;
  using agrad::log;
  using agrad::exp;
  using agrad::pow;
  var x1 = 3;
  std::vector<var> x = { x1 };
  var y = log(pow(x1, 2));      // f: R -> R function to differentiate
  std::vector<double> dy_dx = agrad::grad(y, x);
  std::cout << "y = " << y.val_ << std::endl;
  for (std::size_t i = 0; i < x.size(); ++i)
    std::cout << "dy / dx[" << (i + 1) << "] = " << dy_dx[i] << std::endl;
  return 0;
}