{:check ["true"]}
import numpy as np
import sympy
import matplotlib.pyplot as pl
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
Consider a function with multiple local minima.
$$ y = ((x-1)(x-4)(x+2))^2 $$We can observe that it has three local minima.
def f(x):
return np.power((x - 1)*(x-4)*(x+2), 2)
x = np.linspace(-3, 5)
pl.plot(x, f(x));
Let's expand the polynomial and figure out the derivative of f.
sympy.diff(sympy.expand('((x-1)*(x-4)*(x+2))^2'))
def f_der(x):
return 6*(x**5) - 30*(x**4) - 12*(x**3) + 156*(x**2)-24*x-96
pl.plot(x, f_der(x));
Let's perform gradient descent minimization on f.
def gradientDescent(f, der, x0, alpha, n):
x = x0
logx = []
logy = []
for i in range(n):
x = x - alpha * der(x)
y = f(x)
if (i % (n/10)) == 0:
print("[{:4d}] f({:.2f})={:.4f}".format(i, x, y))
logx.append(x)
logy.append(y)
print(f"After {n} iterations, f({x})={y}")
return logx, logy
logx, logy = gradientDescent(f, f_der, 3, 1e-5, 1000)
pl.figure(figsize=(10,10))
pl.plot(x, f(x))
pl.scatter(logx, logy, color='red')
Definition
A potential field is a function that maps vectors to scalars.
Let's start with just 2D.
$g(x,y) = (x-1)^2 + (y-2)^2$
The minimum occurrs at
def g(x, y):
return (x-1)**2 + (y-2)**2
# Let's plot this in the 2D
xs = np.linspace(-4, 4, 100)
ys = np.linspace(-4, 4, 100)
xx, yy = np.meshgrid(xs, ys)
z = g(xx, yy)
pl.figure(figsize=(6,6))
pl.contour(xx, yy, z, levels=100);
fig = pl.figure(figsize=(10,10))
ax = fig.gca(projection='3d')
ax.plot_surface(xx, yy, z, cmap=cm.coolwarm);
Let's review the definition of gradient of potential field.
$$\nabla g = \left[ \begin{array}{l} \frac{\partial g}{\partial x} \\ \frac{\partial g}{\partial y} \end{array}\right]$$Note: $\nabla g$ is a vector field.
Definition:
A vector field is a function that maps a vector to a vector.
$\nabla g$ is the direction to take to increase the potential $g(x,y)$.
def g_grad_x(x, y):
return 2*(x-1)
def g_grad_y(x, y):
return 2*(y-2)
xs = np.linspace(-4, 4, 10)
ys = np.linspace(-4, 4, 10)
xx, yy = np.meshgrid(xs, ys)
z = g(xx, yy)
u = g_grad_x(xx, yy)
v = g_grad_y(xx, yy)
pl.figure(figsize=(6,6))
pl.quiver(xx, yy, u, v);
To reach the a minimum, we want to move against the local gradient.
def gradientDescent2D(g, g_grad_x, g_grad_y, x0, y0, alpha, N):
x = x0
y = y0
logx, logy, logz = [], [], []
for i in range(N):
dx = g_grad_x(x, y)
dy = g_grad_y(x, y)
x = x - alpha * dx
y = y - alpha * dy
z = g(x, y)
if (i % (N/10)) == 0:
logx.append(x)
logy.append(y)
logz.append(z)
print("[{:3d}] g({:.2f}, {:.2f}) = {:.4f}".format(i, x, y, z))
return logx, logy, logz
logx, logy, logz = gradientDescent2D(g, g_grad_x, g_grad_y, 0, 0, 1e-3, 1000)
pl.figure(figsize=(10,10));
pl.contour(xx, yy, z, levels=20);
pl.quiver(xx, yy, u, v);
pl.scatter(logx, logy, color='r');