0

所以我试图用fviz_nbclust函数估计实际的集群数量,但它并没有停止向我显示这个错误:

do_one(nmeth) 中的错误:外部函数调用中的 NA/NaN/Inf (arg 1)

此外警告消息:

1:在 stats::dist(x) 中:强制引入的 NA

2:在 storage.mode(x) <- "double" :强制引入的 NA

我已经使用sum(is.na(stand_numeric_data$variable))了我的数据集的所有列,它为所有变量返回 0,所以我假设我没有 NA 值。有小费吗?我是编程新手,所以任何建议都将不胜感激。

movies_data <- read.csv("movies_metadata.csv", na.string = "True")

only_numeric <- movies_data %>% select(16, 17, 23, 24, 21) #subset of columns 

only_numeric <- subset(only_numeric, grepl('^\\d+$', only_numeric$revenue))

only_numeric <- subset(only_numeric, grepl('^\\d+$', only_numeric$runtime))

only_numeric <- subset(only_numeric, grepl('^\\d+$', only_numeric$vote_average))

only_numeric <- subset(only_numeric, grepl('^\\d+$', only_numeric$vote_count))

library(caret) #standardization

preproc1 <- preProcess(only_numeric[,c(1:4,5)], method=c("center", "scale"))

stand_numeric_data <- predict(preproc1, only_numeric[,c(1:4,5)])

sum(is.na(stand_numeric_data$revenue))

library(factoextra) #estimate the actual number of clusters 

fviz_nbclust(stand_numeric_data, kmeans, method = "wss")

do_one(nmeth) 中的错误:外部函数调用中的 NA/NaN/Inf (arg 1)

此外警告消息:

1:在 stats::dist(x) 中:强制引入的 NA

2:在 storage.mode(x) <- "double" :强制引入的 NA

dput(head(movies_data, 5))
structure(list(adult = c("False", "False", "False", "False", 
"False"), belongs_to_collection = c("{'id': 10194, 'name': 'Toy Story Collection', 'poster_path': '/7G9915LfUQ2lVfwMEEhDsn3kT4B.jpg', 'backdrop_path': '/9FBwqcd9IRruEDUrTdcaafOMKUq.jpg'}", 
"", "{'id': 119050, 'name': 'Grumpy Old Men Collection', 'poster_path': '/nLvUdqgPgm3F85NMCii9gVFUcet.jpg', 'backdrop_path': '/hypTnLot2z8wpFS7qwsQHW1uV8u.jpg'}", 
"", "{'id': 96871, 'name': 'Father of the Bride Collection', 'poster_path': '/nts4iOmNnq7GNicycMJ9pSAn204.jpg', 'backdrop_path': '/7qwE57OVZmMJChBpLEbJEmzUydk.jpg'}"
), budget = c("30000000", "65000000", "0", "16000000", "0"), 
    genres = c("[{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]", 
    "[{'id': 12, 'name': 'Adventure'}, {'id': 14, 'name': 'Fantasy'}, {'id': 10751, 'name': 'Family'}]", 
    "[{'id': 10749, 'name': 'Romance'}, {'id': 35, 'name': 'Comedy'}]", 
    "[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'name': 'Drama'}, {'id': 10749, 'name': 'Romance'}]", 
    "[{'id': 35, 'name': 'Comedy'}]"), homepage = c("http://toystory.disney.com/toy-story", 
    "", "", "", ""), id = c("862", "8844", "15602", "31357", 
    "11862"), imdb_id = c("tt0114709", "tt0113497", "tt0113228", 
    "tt0114885", "tt0113041"), original_language = c("en", "en", 
    "en", "en", "en"), original_title = c("Toy Story", "Jumanji", 
    "Grumpier Old Men", "Waiting to Exhale", "Father of the Bride Part II"
    ), overview = c("Led by Woody, Andy's toys live happily in his room until Andy's birthday brings Buzz Lightyear onto the scene. Afraid of losing his place in Andy's heart, Woody plots against Buzz. But when circumstances separate Buzz and Woody from their owner, the duo eventually learns to put aside their differences.", 
    "When siblings Judy and Peter discover an enchanted board game that opens the door to a magical world, they unwittingly invite Alan -- an adult who's been trapped inside the game for 26 years -- into their living room. Alan's only hope for freedom is to finish the game, which proves risky as all three find themselves running from giant rhinoceroses, evil monkeys and other terrifying creatures.", 
    "A family wedding reignites the ancient feud between next-door neighbors and fishing buddies John and Max. Meanwhile, a sultry Italian divorcée opens a restaurant at the local bait shop, alarming the locals who worry she'll scare the fish away. But she's less interested in seafood than she is in cooking up a hot time with Max.", 
    "Cheated on, mistreated and stepped on, the women are holding their breath, waiting for the elusive \"good man\" to break a string of less-than-stellar lovers. Friends and confidants Vannah, Bernie, Glo and Robin talk it all out, determined to find a better way to breathe.", 
    "Just when George Banks has recovered from his daughter's wedding, he receives the news that she's pregnant ... and that George's wife, Nina, is expecting too. He was planning on selling their home, but that's a plan that -- like George -- will have to change with the arrival of both a grandchild and a kid of his own."
    ), popularity = c("21.946943", "17.015539", "11.7129", "3.859495", 
    "8.387519"), poster_path = c("/rhIRbceoE9lR4veEXuwCC2wARtG.jpg", 
    "/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg", "/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg", 
    "/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg", "/e64sOI48hQXyru7naBFyssKFxVd.jpg"
    ), production_companies = c("[{'name': 'Pixar Animation Studios', 'id': 3}]", 
    "[{'name': 'TriStar Pictures', 'id': 559}, {'name': 'Teitler Film', 'id': 2550}, {'name': 'Interscope Communications', 'id': 10201}]", 
    "[{'name': 'Warner Bros.', 'id': 6194}, {'name': 'Lancaster Gate', 'id': 19464}]", 
    "[{'name': 'Twentieth Century Fox Film Corporation', 'id': 306}]", 
    "[{'name': 'Sandollar Productions', 'id': 5842}, {'name': 'Touchstone Pictures', 'id': 9195}]"
    ), production_countries = c("[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 
    "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 
    "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 
    "[{'iso_3166_1': 'US', 'name': 'United States of America'}]", 
    "[{'iso_3166_1': 'US', 'name': 'United States of America'}]"
    ), release_date = c("1995-10-30", "1995-12-15", "1995-12-22", 
    "1995-12-22", "1995-02-10"), revenue = c(373554033, 262797249, 
    0, 81452156, 76578911), runtime = c(81, 104, 101, 127, 106
    ), spoken_languages = c("[{'iso_639_1': 'en', 'name': 'English'}]", 
    "[{'iso_639_1': 'en', 'name': 'English'}, {'iso_639_1': 'fr', 'name': 'Français'}]", 
    "[{'iso_639_1': 'en', 'name': 'English'}]", "[{'iso_639_1': 'en', 'name': 'English'}]", 
    "[{'iso_639_1': 'en', 'name': 'English'}]"), status = c("Released", 
    "Released", "Released", "Released", "Released"), tagline = c("", 
    "Roll the dice and unleash the excitement!", "Still Yelling. Still Fighting. Still Ready for Love.", 
    "Friends are the people who let you be yourself... and never let you forget it.", 
    "Just When His World Is Back To Normal... He's In For The Surprise Of His Life!"
    ), title = c("Toy Story", "Jumanji", "Grumpier Old Men", 
    "Waiting to Exhale", "Father of the Bride Part II"), video = c("False", 
    "False", "False", "False", "False"), vote_average = c(7.7, 
    6.9, 6.5, 6.1, 5.7), vote_count = c(5415L, 2413L, 92L, 34L, 
    173L)), row.names = c(NA, 5L), class = "data.frame")
summary(stand_numeric_data)

revenue           runtime          vote_average       vote_count     
 Min.   :-0.1114   Min.   :-2.10206   Min.   :-1.5192   Min.   :-0.1414  
 1st Qu.:-0.1114   1st Qu.:-0.20831   1st Qu.:-1.5192   1st Qu.:-0.1381  
 Median :-0.1114   Median : 0.08303   Median : 0.1963   Median :-0.1381  
 Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000  
 3rd Qu.:-0.1114   3rd Qu.: 0.37438   3rd Qu.: 0.8825   3rd Qu.:-0.1248  
 Max.   :28.9583   Max.   :20.35581   Max.   : 1.9118   Max.   :29.3968  
    title          
 Length:11406      
 Class :character  
 Mode  :character  
4

2 回答 2

0

我可以使用iris数据集重现您的错误

library(tidyverse)
library(factoextra)

str(iris) #To see the data types
summary(iris) #To see if there is NAs

#To get the elbow plot use iris data without character column i.e. Species
fviz_nbclust(iris[-5], kmeans, method = "wss")

#Introduce some NAs in iris dataset
df <- iris %>% 
  mutate(Petal.Length = na_if(Petal.Length, 1.4))

#Now run summary to see NAs
summary(df)

#Now fviz_nbclust gives the error you got
fviz_nbclust(df, kmeans, method = "wss")

do_one(nmeth) 中的错误:外部函数调用中的 NA/NaN/Inf (arg 1) 此外:警告消息:1:在 stats::dist(x) 中:强制引入的 NA 2:在 storage.mode(x) <- "double" : 强制引入的 NA

#Remove the rows containing NAs
df1 <- df[complete.cases(df), ]
#See the summary
summary(df1) #NAs are gone

#Scale and center the data
library(caret)

preproc1 <- preProcess(df1[,c(1:4)], method=c("center", "scale"))
stand_numeric_data <- predict(preproc1, df1[,c(1:4)])

#Now run fviz_nbclust without error
fviz_nbclust(stand_numeric_data, kmeans, method = "wss")
于 2020-09-15T12:34:58.137 回答
0

在等待合适的数据集之前,请根据您的用例调整以下内容,以确定列中非数字值的位置。a数据框中的列在哪里

library('Hmisc')
a <- c(NA, NA, 2, 3, 'aa')
sapply(a, all.is.numeric)

输出:

 <NA>  <NA>     2     3    aa 
FALSE FALSE  TRUE  TRUE FALSE 

您可以在此处阅读有关该all.is.numeric功能的信息:http: //math.furman.edu/~dcs/courses/math47/R/library/Hmisc/html/all.is.numeric.html

于 2020-09-15T12:53:58.380 回答