python - 使用 Tensorflow 概率进行非线性优化

Question

我正在尝试使用 TensorFlow-probability 的 Nelder-Mead 优化器实现高斯拟合：tfp.optimizer.nelder_mead_minimize(). 它不会收敛，但scipy.optimizer.minimize()在不到 1 秒的计算时间内提供了良好的结果。我可能做错了什么，但我不知道是什么？有人可以帮我吗？

我在用：

python                    3.7.3
tensorflow-probability    0.8
tensorflow                2.0

这是我的代码：

import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib as plt

# Define the gaussian model : y = 1/(sigma * sqrt(2 pi)) * exp(- (x-mu)²/(2*sigma²))
pi = np.pi
def model(x, theta):
    y = 1/(theta[1]*tf.sqrt(2*pi)) * tf.exp(-(x-theta[0])**2 /(2*theta[1]**2))
    return y


# Define the loss (least mean square)
def loss_function(theta, y, x, callback=False, n_iterations=1):
    global n_epochs_cb
    loss = tf.losses.mean_squared_error(y, model(x, theta))
    if callback:
        if n_epochs_cb % (n_iterations/10.) == 0:
            print('{0:4d}   {1:}   {2: 3.6f}'.format(n_epochs_cb, theta, loss))
    n_epochs_cb = n_epochs_cb + 1
    return loss


# Generate some data
theta_true = [3, 2]
X = np.arange(-10, 10, 0.5) 
Y = model(X, theta_true) 
# fig, ax = plt.subplots(1, 1, figsize = [20, 10])
# ax.scatter(X, Y, label='data', alpha=0.5)

# initialize parameters 
theta = tf.constant(tf.random.uniform([2], 0, 10), dtype=tf.float32, name='theta')
n_iterations = 100
n_epochs_cb = 1

# minimization
print('{0:4s}   {1:9s}               {2:9s}'.format('Iter', 'theta', 'loss'))
optim_results = tfp.optimizer.nelder_mead_minimize(lambda theta: loss_function(theta, X, Y, True, n_iterations),
                                                   initial_vertex=theta,
                                                   func_tolerance=1e-8,
                                                   position_tolerance=1e-8,
                                                   max_iterations=n_iterations)   

print("theta_true", theta_true)
print("theta_est", optim_results.position.numpy())
print("convergenced:", optim_results.converged.numpy())
print("number of function evaluation", optim_results.num_objective_evaluations.numpy())
print("number of iterations", optim_results.num_iterations.numpy())
print("objective value", optim_results.objective_value.numpy())

优化停止大约 50 次迭代并返回：

Iter   theta                   loss     
  10   [0.1448533 6.7525005]    33.408031
  20   [-0.2385819 28.76061  ]    33.382130
  30   [ -4.1879644 260.84622  ]    33.375771
  40   [ -34.722183 2053.5083  ]    33.375099
  50   [ -418.6432 24589.836 ]    33.375008
theta_true [3, 2]
theta_est [ -488.44122 28687.352  ]
convergenced: True
number of function evaluation 55
number of iterations 35
objective value 33.375008

我使用minimize()from scipy.optimizewith'Nelder-Mead'方法运行了同样的问题，它给出了：

Iter   theta                                          loss     
  10   [4.61612335 4.40795762]    0.007583
  20   [3.19502416 2.09290338]    0.001023
  30   [3.01845636 1.99504269]    0.000091
  40   [2.99843397 2.00065615]    0.000010
Optimization terminated successfully.
         Current function value: 0.000010
         Iterations: 44
         Function evaluations: 96
computation time 0.046 seconds

我希望使用 tensorflow-probability 的 Nelder-Mead 优化器和 Scipy.optimize 的 Nelder-mead 优化器具有相同的性能。

我究竟做错了什么？

编辑： 在损失函数的定义中发现错误。以下代码现在正在收敛：

import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib as plt

# Define the gaussian model : y = 1/(sigma * sqrt(2 pi)) * exp(- (x-mu)²/(2*sigma²))
pi = np.pi
def model(x, theta):
    y =  1/(theta[1]*tf.sqrt(2*pi)) * tf.exp(-(x-theta[0])**2 /(2*theta[1]**2))
    return y


# Define the loss (least mean square)
def loss_function(theta, y, x, callback=False, n_iterations=1):
    global n_epochs_cb
    loss = tf.losses.mean_squared_error(y, model(x, theta))
    if callback:
        if n_epochs_cb % (n_iterations/10.) == 0:
            print('{0:4d}   {1:}   {2: 3.6f}'.format(n_epochs_cb, theta, loss))
    n_epochs_cb = n_epochs_cb + 1
    return loss

# Generate some data
theta_true = [3, 2]
X = np.arange(-10, 10, 0.5, dtype=np.float32) 
Y = model(X, theta_true) 
# fig, ax = plt.subplots(1, 1, figsize = [20, 10])
# ax.scatter(X, Y, label='data', alpha=0.5)

# initialize parameters 
theta = tf.constant(tf.random.uniform([2], 0, 10), dtype=tf.float32, name='theta')

print("theta_true", theta_true)
print("theta_init", theta.numpy())

n_iterations = 100
n_epochs_cb = 1

# minimization
print('{0:4s}   {1:9s}               {2:9s}'.format('Iter', 'theta', 'loss'))
optim_results = tfp.optimizer.nelder_mead_minimize(lambda theta: loss_function(theta, Y, X, True, n_iterations),
                                                   initial_vertex=theta,
                                                   func_tolerance=1e-8,
                                                   position_tolerance=1e-8,
                                                   max_iterations=n_iterations)   


print("theta_est", optim_results.position.numpy())
print("convergenced:", optim_results.converged.numpy())
print("number of function evaluation", optim_results.num_objective_evaluations.numpy())
print("number of iterations", optim_results.num_iterations.numpy())
print("objective value", optim_results.objective_value.numpy())

python - 使用 Tensorflow 概率进行非线性优化

0 回答 0

Related

Reference