python - 没有为任何变量提供梯度 - 具有随机权重的自定义损失函数，具体取决于 Softmax 输出

Question

我很难编写自定义损失函数，该函数使用根据 Softmax 输出预测的类/状态生成的一些随机权重。所需的属性是：

该模型是一个简单的前馈神经网络，输入维数为 1，输出维数为 6。
输出层的激活函数是 Softmax，它打算使用 Argmax 估计实际的类或状态数。
请注意，训练数据仅包含 X（没有 Y）。
损失函数是根据基于每个输入样本 X 的预测状态数采样的随机权重（即 Weibull 分布）定义的。

如下，我提供了一个最小的例子来说明。为简化起见，我仅根据 state/class-1 的随机权重定义损失函数。我得到：“ValueError：没有为任何变量提供渐变：['dense_41/kernel:0', 'dense_41/bias:0', 'dense_42/kernel:0', 'dense_42/bias:0']。”

如下面的帖子所示，我发现 argmax 是不可微的，softargmax 函数会有所帮助（正如我在下面的代码中实现的那样）。但是，我仍然遇到同样的错误。绕过不可微分的 tf.argmax

import sys
import time
from tqdm import tqdm
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from scipy.stats import weibull_min

###############################################################################################
# Generate Dataset
lb  = np.array([2.0])   # Left boundary
ub  = np.array([100.0])  # Right boundary
# Data Points - uniformly distributed
N_r = 50
X_r = np.linspace(lb, ub, N_r)    
###############################################################################################
#Define Model
class DGM:
    # Initialize the class
    def __init__(self, X_r): 
        #Normalize training input data
        self.Xmean, self.Xstd = np.mean(X_r), np.std(X_r)
        X_r = (X_r - self.Xmean) / self.Xstd
        self.X_r = X_r
        #Input and output variable dimensions
        self.X_dim = 1; self.Y_dim = 6
        # Define tensors
        self.X_r_tf = tf.convert_to_tensor(X_r, dtype=tf.float32)
        #Learning rate
        self.LEARNING_RATE=1e-4
        #Feedforward neural network model
        self.modelTest = self.test_model()
    ###############################################
    # Initialize network weights and biases 
    def test_model(self):
        input_shape = self.X_dim
        dimensionality = self.Y_dim
        model = tf.keras.Sequential()
        model.add(layers.Input(shape=input_shape))
        model.add(layers.Dense(64, kernel_initializer='glorot_uniform',bias_initializer='zeros'))
        model.add(layers.Activation('tanh'))
        model.add(layers.Dense(dimensionality))
        model.add(layers.Activation('softmax'))
        return model
    ##############################################        
    def compute_loss(self):
        #Define optimizer
        gen_opt = tf.keras.optimizers.Adam(lr=self.LEARNING_RATE, beta_1=0.0,beta_2=0.9)
        with tf.GradientTape() as test_tape:
            ###### calculate loss
            generated_u = self.modelTest(self.X_r_tf, training=True)
            #number of data
            n_data = generated_u.shape[0] 
            #initialize random weights assuming state-1 at all input samples
            wt1 = np.zeros((n_data, 1),dtype=np.float32) #initialize weights
            for b in range(n_data):
                wt1[b] = weibull_min.rvs(c=2, loc=0, scale =4 , size=1)   
            wt1 =  tf.reshape(tf.convert_to_tensor(wt1, dtype=tf.float32),shape=(n_data,1))
            #print('-----------sampling done-----------')  
            #determine the actual state using softargmax
            idst = self.softargmax(generated_u)
            idst = tf.reshape(tf.cast(idst, tf.float32),shape=(n_data,1))
            #index state-1
            id1 = tf.constant(0.,dtype=tf.float32)
            #assign weights if predicted state is state-1
            wt1_final = tf.cast(tf.equal(idst, id1), dtype=tf.float32)*wt1
            #final loss
            test_loss = tf.reduce_mean(tf.square(wt1_final)) 
            #print('-----------test loss calcuated-----------')

        gradients_of_modelTest = test_tape.gradient(test_loss,
                                                    [self.modelTest.trainable_variables])

        gen_opt.apply_gradients(zip(gradients_of_modelTest[0],self.modelTest.trainable_variables))

        return test_loss
#reference: Getting around tf.argmax which is not differentiable
#https://stackoverflow.com/questions/46926809/getting-around-tf-argmax-which-is-not-differentiable
    def softargmax(self, x, beta=1e10):
        x = tf.convert_to_tensor(x)
        x_range = tf.range(x.shape.as_list()[-1], dtype=x.dtype)
        return tf.reduce_sum(tf.nn.softmax(x*beta,axis=1) * x_range, axis=-1)

    ##############################################
    def train(self,training_steps=100):
        train_start_time = time.time()
        for step in tqdm(range(training_steps), desc='Training'):
            start = time.time()
            test_loss = self.compute_loss()          

            if (step + 1) % 10 == 0:
                elapsed_time = time.time() - train_start_time
                sec_per_step = elapsed_time / step
                mins_left = ((training_steps - step) * sec_per_step)
                tf.print("\nStep # ", step, "/", training_steps,
                         output_stream=sys.stdout)
                tf.print("Current time:", elapsed_time, " time left:",
                         mins_left, output_stream=sys.stdout)
                tf.print("Test Loss: ", test_loss, output_stream=sys.stdout)
###############################################################################################
#Define and train the model
model = DGM(X_r)
model.train(training_steps=100)

python - 没有为任何变量提供梯度 - 具有随机权重的自定义损失函数，具体取决于 Softmax 输出

0 回答 0

Related

Reference