我正在求解简单的常微分方程(ODE)
y'(x) = 2 * x
y(0) = 1
使用 TensorFlow 和下面的代码
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
alphaAdam = 0.01 # --- Learning rate. float >= 0.
betaAdam = 0.99 # --- Exponential decay rate for the 1st moment estimates. float, 0 < beta < 1. Generally close to 1.
epsilonAdam = 1e-1 # --- Fuzz factor. float >= 0.
numIter = 20000 # --- Number of gradient descent iterations
skipIter = 100 # --- Number of iterations to skip for presenting the results
f0 = 1 # --- Boundary condition
xEpsilon = np.sqrt(np.finfo(np.float32).eps)
N = 20 # --- Number of sampling points
x = tf.cast(tf.reshape(tf.linspace(0, 1, N), (N, 1)), dtype = tf.float32)
# --- Independent variable
numNeuronsInputLayer = 1 # --- Number of neurons of the input layer
numNeuronsLayer1 = 32 # --- Number of neurons of the first layer
numNeuronsLayer2 = 32 # --- Number of neurons of the second layer
numNeuronsLayer3 = 32 # --- Number of neurons of the third layer
numNeuronsOutputLayer = 1 # --- Number of neurons of the output layer
weights = {
'h1': tf.Variable(tf.random.normal([numNeuronsInputLayer, numNeuronsLayer1])),
'h2': tf.Variable(tf.random.normal([numNeuronsLayer1, numNeuronsLayer2])),
'h3': tf.Variable(tf.random.normal([numNeuronsLayer2, numNeuronsLayer3])),
'out': tf.Variable(tf.random.normal([numNeuronsLayer3, numNeuronsOutputLayer]))
}
biases = {
'b1': tf.Variable(tf.random.normal([numNeuronsLayer1])),
'b2': tf.Variable(tf.random.normal([numNeuronsLayer2])),
'b3': tf.Variable(tf.random.normal([numNeuronsLayer3])),
'out': tf.Variable(tf.random.normal([numNeuronsOutputLayer]))
}
def model(x):
x = np.array([[[x]]], dtype='float32')
layer1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer1 = tf.nn.sigmoid(layer1)
layer2 = tf.add(tf.matmul(layer1, weights['h2']), biases['b2'])
layer2 = tf.nn.sigmoid(layer2)
layer3 = tf.add(tf.matmul(layer2, weights['h3']), biases['b3'])
layer3 = tf.nn.sigmoid(layer3)
output = tf.matmul(layer3, weights['out']) + biases['out']
return tf.nn.sigmoid(output)
# --- Auxiliary function
def g(x):
return x * model(x) + f0
# --- ODE function
def f(x):
return 2 * x
def costFunction(x):
with tf.GradientTape(persistent = True) as tape:
tape.watch(x)
y = g(x)
#dNN = tape.gradient(y, x)
dNN = (g(x + xEpsilon) - g(x)) / xEpsilon
return tf.reduce_sum(tf.square(tf.abs(dNN - f(x))))
optimizer = tf.keras.optimizers.Adam(learning_rate = alphaAdam, beta_1 = betaAdam, epsilon = epsilonAdam)
def optimizationStep():
with tf.GradientTape() as tape:
costFunctionValue = costFunction(x)
trainable_variables = list(weights.values()) + list(biases.values())
gradients = tape.gradient(costFunctionValue, trainable_variables)
optimizer.apply_gradients(zip(gradients, trainable_variables))
for i in range(numIter):
optimizationStep()
if i % skipIter == 0:
print("Cost function: %f " % (costFunction(x)))
def referenceSolution(x):
return x**2 + 1
figure(figsize = (10, 10))
yref = referenceSolution(X)
y = g(x)
plt.plot(x, yref, label = "Reference")
plt.plot(x, y, label = "Approximation")
plt.legend(loc = 2, prop = {'size': 20})
plt.show()
如果我通过有限差分计算导数y'(x)
,即用线
dNN = (g(x + xEpsilon) - g(x)) / xEpsilon
代码运行良好。但是,如果我使用自动微分,即线条
with tf.GradientTape(persistent = True) as tape:
tape.watch(x)
y = g(x)
dNN = tape.gradient(y, x)
代码不起作用。
我究竟做错了什么?