0
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel, TFGPT2Model, TFAutoModelForCausalLM
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.padding_side = "left" 
tokenizer.pad_token = tokenizer.eos_token # to avoid an error

gpt2 = TFGPT2LMHeadModel.from_pretrained('gpt2')
gpt2.trainable = True

#model = TFAutoModelForCausalLM.from_pretrained("gpt2")
#model = TFGPT2LMHeadModel.from_pretrained('gpt2')

#model.train()
# when generating, we will use the logits of right-most token to predict the next token
# so the padding should be on the left



num_return_sequences = 1
#prompts = list(x_batch_train.numpy().reshape(-1))

#token_lens = [len(tokenizer.tokenize(sent)) for sent in prompts]
#max_length = math.ceil(np.array(token_lens).max())*2
max_len = get_tokens_len(ds, 0.99) 

cce = tf.keras.losses.CategoricalCrossentropy()   
optimizer = keras.optimizers.Adam(learning_rate=0.0001)


def loss_fn(output_sequences, labels):
    syn_sents = tokenizer.batch_decode(output_sequences, clean_up_tokenization_spaces=True, skip_special_tokens=True)
    syn_sents_pure = []
    for sent, sent_syn in zip(prompts, syn_sents):
        syn_sents_pure.append(sent_syn.replace(sent, '').replace('\n',' ').strip())

    preds = model(np.array(syn_sents_pure))

    assert preds.shape[0] == len(prompts) and preds.shape[1] == num_classes

    label_oht = tf.keras.utils.to_categorical( np.array([label_idx[l] for l in labels]), num_classes = num_classes, dtype='int' ) 
    label_oht_tf = tf.convert_to_tensor(label_oht)
    assert label_oht.shape == preds.shape

    loss_value = cce(label_oht_tf, preds)#.numpy()
    return loss_value

rows = ds.df_test.sample(5)
prompts = rows['content'].tolist()
labels = rows['label'].tolist()

with tf.GradientTape() as tape:
    # Run the forward pass of the layer.
    # The operations that the layer applies
    # to its inputs are going to be recorded
    # on the GradientTape.
    #logits = model(x_batch_train, training=True)  # Logits for this minibatch

    inputs = tokenizer(prompts, padding='max_length', truncation=True, max_length=max_len, return_tensors="tf")
    output_sequences = gpt2.generate(
        input_ids = inputs['input_ids'],
        attention_mask = inputs['attention_mask'],
        max_length= max_len*2,
        temperature=1,
        top_k=0,
        top_p=0.9,
        repetition_penalty=1,
        do_sample=True,
        num_return_sequences=num_return_sequences
    )

    # Compute the loss value for this minibatch.
    #loss_value = loss_fn(y_batch_train, logits)
    loss_value = loss_fn(output_sequences, labels) # <tf.Tensor: shape=(), dtype=float32, numpy=0.062384058>


# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grads = tape.gradient(loss_value, gpt2.trainable_weights)

我从 gpt2 加载预训练模型,TFGPT2LMHeadModel并使用它的综合句子给出提示来计算损失。

损失似乎还可以,它是一个张量,例如`

<tf.Tensor: shape=(), dtype=float32, numpy=1.0446845>

`

但是所有的元素grads都是

没有任何

为什么这个?有什么提示吗?

谢谢。

4

0 回答 0