我正在尝试编写一个功能来检查每个查询的最高分数是多少。查询是搜索词,手套词向量使用产品描述进行训练。下面列出了我用于训练词向量和特征本身 (all.gloveterms) 的代码。我收到错误:word_vectors[query, , drop = FALSE] 中的错误:下标越界,在尝试执行命令后:all.gloveterms(query_product$search_term[2]),其中搜索词应返回“角度括号”。我很难找出如何解决这个错误,并且想知道是否有人可以提供帮助。
#select product descriptions
descriptions <- product_descriptions[,2]
descriptions <- removePunctuation(descriptions)
descriptions <- tolower(descriptions)
it = itoken(descriptions, tolower, word_tokenizer, n_chunks = 10)
#find vocab from product descriptions
vocab = create_vocabulary(it)
dim(vocab)
vocab = prune_vocabulary(vocab, term_count_min = 10, doc_proportion_max = 0.8,doc_proportion_min = 0.001, vocab_term_max = 20000)
dim(vocab)
vectorizer = vocab_vectorizer(vocab)
# create term co-occurrence matrix with window-size 5
tcm = create_tcm(it, vectorizer, skip_grams_window = 5L)
# train the word vectors
glove = GlobalVectors$new(rank = 50, x_max = 10)
wv_main = glove$fit_transform(tcm, n_iter = 50, convergence_tol = 0.01)
wv_context = glove$components
word_vectors = wv_main + t(wv_context)
all.gloveterms <- function (queries)
{
n <- length(queries)
scores <- c(0)
for(i in 1:n){
query <- queries[i]
wordvector = word_vectors[query, , drop = FALSE]
cosine = sim2(x = word_vectors, y = wordvector, method = "cosine", norm = "l2")
scores <- append(scores, max(cosine))
}
y <- as.integer(0)
score <- sum(unlist(scores))/n
feature[i] <- if (length(score)) score else y
}