import numpy as np
import sympy
import matplotlib.pyplot as pl
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

import numpy as np
import sympy
import matplotlib.pyplot as pl
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

Single Variable Calculus¶

Consider a function with multiple local minima.

$$ y = ((x-1)(x-4)(x+2))^2 $$

We can observe that it has three local minima.

$x=1$
$x=4$
$x=-2$

def f(x):
    return np.power((x - 1)*(x-4)*(x+2), 2)

def f(x):
    return np.power((x - 1)*(x-4)*(x+2), 2)

x = np.linspace(-3, 5)
pl.plot(x, f(x));

x = np.linspace(-3, 5)
pl.plot(x, f(x));

Let's expand the polynomial and figure out the derivative of f.

sympy.diff(sympy.expand('((x-1)*(x-4)*(x+2))^2'))

sympy.diff(sympy.expand('((x-1)*(x-4)*(x+2))^2'))

def f_der(x):
    return 6*(x**5) - 30*(x**4) - 12*(x**3) + 156*(x**2)-24*x-96

def f_der(x):
    return 6*(x**5) - 30*(x**4) - 12*(x**3) + 156*(x**2)-24*x-96

pl.plot(x, f_der(x));

pl.plot(x, f_der(x));

Let's perform gradient descent minimization on f.

def gradientDescent(f, der, x0, alpha, n):
    x = x0
    logx = []
    logy = []
    for i in range(n):
        x = x - alpha * der(x)
        y = f(x)
        if (i % (n/10)) == 0:
            print("[{:4d}] f({:.2f})={:.4f}".format(i, x, y))
            logx.append(x)
            logy.append(y)
    print(f"After {n} iterations, f({x})={y}")
    return logx, logy

def gradientDescent(f, der, x0, alpha, n):
    x = x0
    logx = []
    logy = []
    for i in range(n):
        x = x - alpha * der(x)
        y = f(x)
        if (i % (n/10)) == 0:
            print("[{:4d}] f({:.2f})={:.4f}".format(i, x, y))
            logx.append(x)
            logy.append(y)
    print(f"After {n} iterations, f({x})={y}")
    return logx, logy

logx, logy = gradientDescent(f, f_der, 3, 1e-5, 1000)

[   0] f(3.00)=99.9640
[ 100] f(3.07)=95.4305
[ 200] f(3.15)=88.5070
[ 300] f(3.25)=78.3211
[ 400] f(3.37)=64.3258
[ 500] f(3.50)=47.1922
[ 600] f(3.63)=29.6094
[ 700] f(3.75)=15.3710
[ 800] f(3.85)=6.5778
[ 900] f(3.91)=2.3898
After 1000 iterations, f(3.949644827595383)=0.7809256821385542

logx, logy = gradientDescent(f, f_der, 3, 1e-5, 1000)

[   0] f(3.00)=99.9640
[ 100] f(3.07)=95.4305
[ 200] f(3.15)=88.5070
[ 300] f(3.25)=78.3211
[ 400] f(3.37)=64.3258
[ 500] f(3.50)=47.1922
[ 600] f(3.63)=29.6094
[ 700] f(3.75)=15.3710
[ 800] f(3.85)=6.5778
[ 900] f(3.91)=2.3898
After 1000 iterations, f(3.949644827595383)=0.7809256821385542

pl.figure(figsize=(10,10))
pl.plot(x, f(x))
pl.scatter(logx, logy, color='red')

<matplotlib.collections.PathCollection at 0x7f1442162e50>

pl.figure(figsize=(10,10))
pl.plot(x, f(x))
pl.scatter(logx, logy, color='red')

<matplotlib.collections.PathCollection at 0x7f1442162e50>

2-Variable Calculus¶

Potential field¶

Definition

A potential field is a function that maps vectors to scalars.

Let's start with just 2D.

$g(x,y) = (x-1)^2 + (y-2)^2$

The minimum occurrs at

$x = 1$
$y = 2$

def g(x, y):
    return (x-1)**2 + (y-2)**2

def g(x, y):
    return (x-1)**2 + (y-2)**2

# Let's plot this in the 2D

xs = np.linspace(-4, 4, 100)
ys = np.linspace(-4, 4, 100)
xx, yy = np.meshgrid(xs, ys)
z = g(xx, yy)

pl.figure(figsize=(6,6))
pl.contour(xx, yy, z, levels=100);

# Let's plot this in the 2D

xs = np.linspace(-4, 4, 100)
ys = np.linspace(-4, 4, 100)
xx, yy = np.meshgrid(xs, ys)
z = g(xx, yy)

pl.figure(figsize=(6,6))
pl.contour(xx, yy, z, levels=100);

fig = pl.figure(figsize=(10,10))
ax = fig.gca(projection='3d')
ax.plot_surface(xx, yy, z, cmap=cm.coolwarm);

fig = pl.figure(figsize=(10,10))
ax = fig.gca(projection='3d')
ax.plot_surface(xx, yy, z, cmap=cm.coolwarm);

Gradient and Vector Field¶

Let's review the definition of gradient of potential field.

$$\nabla g = \left[ \begin{array}{l} \frac{\partial g}{\partial x} \\ \frac{\partial g}{\partial y} \end{array}\right]$$

Note: $\nabla g$ is a vector field.

Definition:

A vector field is a function that maps a vector to a vector.

$\nabla g$ is the direction to take to increase the potential $g(x,y)$.

def g_grad_x(x, y):
    return 2*(x-1)
def g_grad_y(x, y):
    return 2*(y-2)

def g_grad_x(x, y):
    return 2*(x-1)
def g_grad_y(x, y):
    return 2*(y-2)

xs = np.linspace(-4, 4, 10)
ys = np.linspace(-4, 4, 10)
xx, yy = np.meshgrid(xs, ys)
z = g(xx, yy)

u = g_grad_x(xx, yy)
v = g_grad_y(xx, yy)

pl.figure(figsize=(6,6))
pl.quiver(xx, yy, u, v);

xs = np.linspace(-4, 4, 10)
ys = np.linspace(-4, 4, 10)
xx, yy = np.meshgrid(xs, ys)
z = g(xx, yy)

u = g_grad_x(xx, yy)
v = g_grad_y(xx, yy)

pl.figure(figsize=(6,6))
pl.quiver(xx, yy, u, v);

Gradient Descent¶

To reach the a minimum, we want to move against the local gradient.

def gradientDescent2D(g, g_grad_x, g_grad_y, x0, y0, alpha, N):
    x = x0
    y = y0
    logx, logy, logz = [], [], []
    for i in range(N):
        dx = g_grad_x(x, y)
        dy = g_grad_y(x, y)
        x = x - alpha * dx
        y = y - alpha * dy
        z = g(x, y)
        if (i % (N/10)) == 0:
            logx.append(x)
            logy.append(y)
            logz.append(z)
            print("[{:3d}] g({:.2f}, {:.2f}) = {:.4f}".format(i, x, y, z))
    return logx, logy, logz

def gradientDescent2D(g, g_grad_x, g_grad_y, x0, y0, alpha, N):
    x = x0
    y = y0
    logx, logy, logz = [], [], []
    for i in range(N):
        dx = g_grad_x(x, y)
        dy = g_grad_y(x, y)
        x = x - alpha * dx
        y = y - alpha * dy
        z = g(x, y)
        if (i % (N/10)) == 0:
            logx.append(x)
            logy.append(y)
            logz.append(z)
            print("[{:3d}] g({:.2f}, {:.2f}) = {:.4f}".format(i, x, y, z))
    return logx, logy, logz

logx, logy, logz = gradientDescent2D(g, g_grad_x, g_grad_y, 0, 0, 1e-3, 1000)

[  0] g(0.00, 0.00) = 4.9800
[100] g(0.18, 0.37) = 3.3369
[200] g(0.33, 0.66) = 2.2359
[300] g(0.45, 0.91) = 1.4982
[400] g(0.55, 1.10) = 1.0038
[500] g(0.63, 1.27) = 0.6726
[600] g(0.70, 1.40) = 0.4507
[700] g(0.75, 1.51) = 0.3020
[800] g(0.80, 1.60) = 0.2023
[900] g(0.84, 1.67) = 0.1356

logx, logy, logz = gradientDescent2D(g, g_grad_x, g_grad_y, 0, 0, 1e-3, 1000)

[  0] g(0.00, 0.00) = 4.9800
[100] g(0.18, 0.37) = 3.3369
[200] g(0.33, 0.66) = 2.2359
[300] g(0.45, 0.91) = 1.4982
[400] g(0.55, 1.10) = 1.0038
[500] g(0.63, 1.27) = 0.6726
[600] g(0.70, 1.40) = 0.4507
[700] g(0.75, 1.51) = 0.3020
[800] g(0.80, 1.60) = 0.2023
[900] g(0.84, 1.67) = 0.1356

pl.figure(figsize=(10,10));
pl.contour(xx, yy, z, levels=20);
pl.quiver(xx, yy, u, v);
pl.scatter(logx, logy, color='r');

pl.figure(figsize=(10,10));
pl.contour(xx, yy, z, levels=20);
pl.quiver(xx, yy, u, v);
pl.scatter(logx, logy, color='r');

Index

Vector Calculus

Calculus

Single Variable Calculus¶

2-Variable Calculus¶

Potential field¶

Gradient and Vector Field¶

Gradient Descent¶