Models and Parameter Estimation¶

In this section, we will look at the problem of modeling and parameter estimation.

We will show how gradient descent method can be a general purpose method for parameter selection of given models.

import numpy as np
import matplotlib.pyplot as pl
import pandas

import numpy as np
import matplotlib.pyplot as pl
import pandas

Generate some random data.

x_data = np.linspace(0, 10, 100)
y_data = 7*x_data + 20 + np.random.randn(100) * 5

pl.plot(x_data, y_data, '.')
pl.xlim(0, 10)
pl.ylim(0, 100);

x_data = np.linspace(0, 10, 100)
y_data = 7*x_data + 20 + np.random.randn(100) * 5

pl.plot(x_data, y_data, '.')
pl.xlim(0, 10)
pl.ylim(0, 100);

df = pandas.DataFrame({"x": x_data, "y": y_data})
df.to_csv('line_fitting_data.csv', index=False)

df = pandas.DataFrame({"x": x_data, "y": y_data})
df.to_csv('line_fitting_data.csv', index=False)

We can speculate that this should be a straight-line, so we choose a simple model:

$$ y = ax + b $$

But what should we choose as $a$ and $b$?

Define the parametes as:

$$ c = \left[\begin{array}{l} a \\ b \end{array}\right] $$

def f(x, c):
    return c[0]*x + c[1]

def f(x, c):
    return c[0]*x + c[1]

y_pred = f(x_data, [14, -10])

pl.plot(x_data, y_data, '.')
pl.plot(x_data, y_pred, '--')
pl.xlim(0, 10)
pl.ylim(0, 150);

y_pred = f(x_data, [14, -10])

pl.plot(x_data, y_data, '.')
pl.plot(x_data, y_pred, '--')
pl.xlim(0, 10)
pl.ylim(0, 150);

We assess the quality of a parameter by looking at the error incurred by the model prediction.

def err(x_data, y_data, c):
    y_pred = f(x_data, c)
    return (y_pred - y_data)

def err(x_data, y_data, c):
    y_pred = f(x_data, c)
    return (y_pred - y_data)

c = [14, -10]
e = err(x_data, y_data, c)
w = (x_data[1] - x_data[0])/3
pl.bar(x_data, e, width=w);

c = [14, -10]
e = err(x_data, y_data, c)
w = (x_data[1] - x_data[0])/3
pl.bar(x_data, e, width=w);

Now, we can compute a loss function that assesses the quality of the model parameter $c$.

$$ L = \sum_i (e_i)^2 $$

Note:

$L(c)$ is a potential field function.
$\nabla L(c)$ is a vector field function.

def loss(c):
    e = err(x_data, y_data, c)
    L = np.sum(e ** 2)
    return L

def loss(c):
    e = err(x_data, y_data, c)
    L = np.sum(e ** 2)
    return L

def loss_grad(c):
    e = err(x_data, y_data, c)
    grad_a = np.sum(2*e*x_data)
    grad_b = np.sum(2*e)
    return np.array([grad_a, grad_b])

def loss_grad(c):
    e = err(x_data, y_data, c)
    grad_a = np.sum(2*e*x_data)
    grad_b = np.sum(2*e)
    return np.array([grad_a, grad_b])

grad = loss_grad(c)
grad

array([16956.14924098,   920.99134434])

grad = loss_grad(c)
grad

array([16956.14924098,   920.99134434])

Perform gradient descent optimization

def gradientDescent(loss, loss_grad, c0, alpha, n):
    logL = []
    logC = []
    c = np.array(c0)
    for i in range(n):
        c = c - alpha * loss_grad(c)
        L = loss(c)
        if (i % (n/10)) == 0:
            print(c.round(decimals=2), L)
        logL.append(L)
        logC.append(c)
    return logL, logC

def gradientDescent(loss, loss_grad, c0, alpha, n):
    logL = []
    logC = []
    c = np.array(c0)
    for i in range(n):
        c = c - alpha * loss_grad(c)
        L = loss(c)
        if (i % (n/10)) == 0:
            print(c.round(decimals=2), L)
        logL.append(L)
        logC.append(c)
    return logL, logC

logL, logC = gradientDescent(loss, loss_grad, [10, 10], 1e-4, 2000)

[8.98 9.91] 6937.4540011901845
[ 7.4  17.33] 3096.502360514567
[ 6.98 20.09] 2674.210704939174
[ 6.83 21.12] 2616.516373502924
[ 6.77 21.49] 2608.6340584348213
[ 6.75 21.63] 2607.557160769382
[ 6.74 21.68] 2607.4100328543195
[ 6.74 21.7 ] 2607.3899319440056
[ 6.74 21.71] 2607.3871857173954
[ 6.74 21.71] 2607.386810522418

logL, logC = gradientDescent(loss, loss_grad, [10, 10], 1e-4, 2000)

[8.98 9.91] 6937.4540011901845
[ 7.4  17.33] 3096.502360514567
[ 6.98 20.09] 2674.210704939174
[ 6.83 21.12] 2616.516373502924
[ 6.77 21.49] 2608.6340584348213
[ 6.75 21.63] 2607.557160769382
[ 6.74 21.68] 2607.4100328543195
[ 6.74 21.7 ] 2607.3899319440056
[ 6.74 21.71] 2607.3871857173954
[ 6.74 21.71] 2607.386810522418

We can visualize both the line against the data, and the loss function over the iterations.

pl.figure(figsize=(6, 12))
pl.subplot(2, 1, 1)
pl.plot(x_data, y_data, '.')
y_pred = f(x_data, logC[-1])
pl.plot(x_data, y_pred, '--')
pl.title('Line fitting after 2000 iterations', fontsize=15)

pl.subplot(2, 1, 2)
pl.plot(logL)
pl.title('Loss function over the iterations', fontsize=15);

pl.figure(figsize=(6, 12))
pl.subplot(2, 1, 1)
pl.plot(x_data, y_data, '.')
y_pred = f(x_data, logC[-1])
pl.plot(x_data, y_pred, '--')
pl.title('Line fitting after 2000 iterations', fontsize=15)

pl.subplot(2, 1, 2)
pl.plot(logL)
pl.title('Loss function over the iterations', fontsize=15);

Index

Line Fitting

box

Line Fitting

Models and Parameter Estimation¶