Towards Intuition

import ipywidgets as widgets
from ipywidgets import interact

import numpy as np
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16})

import pandas as pd
from scipy.optimize import minimize
def f1(x):
    """some function to minimize"""
    return x * x
def g1(x):
    """gradient of f(x)"""
    return x
def gradient_descent_step(grad, init, stepsize):
    return init - stepsize * grad
@interact(
    S = widgets.IntSlider(value=1, min=0, max=100, step=1, description = "steps")
)
def plot_gradient_descent(S):
    stepsize = 5 # range from 0 to 2

    fig, axs = plt.subplots(1, 2, figsize = (12, 5))
    x = np.linspace(-10, 10, 301)
    fx = f1(x)
    axs[0].plot(x, fx)
    xstar = 8
    xstars = np.zeros(S)
    ystars = np.zeros(S)

    for s in range(S):
        # store xs and ys for pretty plots
        xstars[s] = xstar
        ystars[s] = f1(xstar)

        # evaluate gradient
        grad = g1(xstar)

        # minimization step
        xstar = gradient_descent_step(grad, xstar, stepsize)

    axs[0].plot(xstars, f1(xstars), marker = "o", linestyle = "-", c = "orange", markersize = 10)
    axs[0].set_title(f"After {S - 1} steps, x_star = {xstar:.2f}")

    axs[1].plot(np.arange(S), ystars, color = "orange", markersize = 10, marker = "o")
    axs[1].set_xlabel("step")
    axs[1].set_ylabel("loss")

Optimization is Hard

def f2(x):
    return x ** 4 - 5 * x ** 2 - 3 * x
def g2(x):
    return 4 * x ** 3 - 10 * x - 3
@interact(
    S = widgets.IntSlider(value=1, min=0, max=100, step=1, description = "steps")
)
def plot_gradient_descent(S):
    stepsize = 0.07 # change stepsize

    fig, axs = plt.subplots(1, 2, figsize = (12, 5))
    x = np.linspace(-3, 3, 301)
    fx = f2(x)
    axs[0].plot(x, fx)
    xstar = -2.5 # change initial position, e.g. starting point
    xstars = np.zeros(S)
    ystars = np.zeros(S)

    for s in range(S):
        # store xs and ys for pretty plots
        xstars[s] = xstar
        ystars[s] = f2(xstar)

        # evaluate gradient
        grad = g2(xstar)

        # minimization step
        xstar = gradient_descent_step(grad, xstar, stepsize)

    axs[0].plot(xstars, f2(xstars), marker = "o", linestyle = "-", c = "orange", markersize = 10)
    axs[0].set_title(f"After {S - 1} steps, x_star = {xstar:.2f}")

    axs[1].plot(np.arange(S), ystars, color = "orange", markersize = 10, marker = "o")
    axs[1].set_xlabel("step")
    axs[1].set_ylabel("loss")