我想尝试 tensorflow-hub 中提供的嵌入,具体来说是“通用句子编码器”。我尝试了提供的示例(https://colab.research.google.com/github/tensorflow/hub/blob/master/examples/colab/semantic_similarity_with_tf_hub_universal_encoder.ipynb),效果很好。所以我尝试对“多语言”模型做同样的事情,但每次加载多语言模型时,colab 内核都会失败并重新启动。有什么问题,我该如何解决?
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
import seaborn as sns
import tf_sentencepiece
import sentencepiece
# Import the Universal Sentence Encoder's TF Hub module
embed = hub.Module("https://tfhub.dev/google/universal-sentence-encoder-multilingual/1") // This is where the kernel dies.
print("imported model")
# Compute a representation for each message, showing various lengths supported.
word = "코끼리"
sentence = "나는 한국어로 쓰여진 문장이야."
paragraph = (
"동해물과 백두산이 마르고 닳도록. "
"하느님이 보우하사 우리나라 만세~")
messages = [word, sentence, paragraph]
# Reduce logging output.
tf.logging.set_verbosity(tf.logging.ERROR)
with tf.Session() as session:
session.run([tf.global_variables_initializer(), tf.tables_initializer()])
message_embeddings = session.run(embed(messages))
for i, message_embedding in enumerate(np.array(message_embeddings).tolist()):
print("Message: {}".format(messages[i]))
print("Embedding size: {}".format(len(message_embedding)))
message_embedding_snippet = ", ".join(
(str(x) for x in message_embedding[:3]))
print("Embedding: [{}, ...]\n".format(message_embedding_snippet))