这是一个由两部分组成的问题,非常复杂。
第一的。我想计算数据帧“ind_scores”中每个样本之间的“n”维欧几里德距离,它是数据帧“质心”中的相应组质心。
然后,我想选择最接近各自组质心的 3 个单独样本。我想将这些保存到一个新的数据框中,其中包含“单个样本名称”、“组”和“到质心的距离”的信息。
以下是数据示例:
ind_scores <- data.frame(row.names = c("OP2413iiiaMOU","OP2413iiibMOU","OP2413iiicMOU","OP2645ii_aPOR","OP2645ii_bPOR","OP2645ii_cPOR","OP2645ii_dPOR","OP2645ii_ePOR","OP3088i__aPOR","OP5043___aWAT","OP5043___bWAT","OP5044___aMOU","OP5044___bMOU","OP5044___cMOU","OP5046___aWAT","OP5046___bWAT","OP5046___cWAT","OP5046___dWAT","OP5046___eWAT","OP5047___aPHA","OP5047___bPHA","OP5048___bPHA","OP5048___cPHA","OP5048___dPHA","OP5048___ePHA","OP5048___fPHA","OP5048___gPHA","OP5048___hPHA","OP5049___aWAT","OP5049___bWAT","OP5051DNAaCOM","OP5051DNAbCOM","OP5051DNAcCOM","OP5052DNAaWAT","OP5053DNAaPHA","OP5053DNAbPHA","OP5053DNAcPHA","OP5054DNAaMOU","OP5054DNAbMOU","OP5054DNAcMOU"),
group = c("4","4","4","1","1","1","1","1","3","3","3","5","5","5","2","5","2","2","5","3","3","3","3","1","3","3","3","3","2","2","4","5","4","2","3","3","3","5","5","5"),
CV.1 = c(-13.3864612433581,-12.8079930877268,-12.8078461023615,11.609290941109,10.5489837203281,10.8802079446603,11.7559827821839,10.769027609963,2.93788199576291,5.14343682437333,1.1768471575429,-3.59878541566711,-3.69656648297924,-3.13205394000296,-1.88190759998412,-3.50181277277038,0.563858206656491,-1.38629942623866,-3.73771209413208,3.40039117982473,2.86962877144321,3.80869463338469,4.15722705333298,9.08529455175736,3.15497802125988,2.42193314853044,0.600699372070624,4.14515087614032,-3.3599436881205,-1.8893406509868,-13.355031250023,-4.10118631444206,-11.4911993949333,-1.55841778422586,2.91834267480086,1.58762181687645,3.08125993208779,-3.84248479288043,-3.60800082570682,-3.47369634755007),
CV.2 = c(-5.98931418061097,-6.48685652483353,-6.48781938591041,-5.4121748521578,-4.56051914391762,-5.14772881585026,-4.3883054106957,-3.06298578319138,0.25688954313487,1.01459325674394,1.47381593062751,5.11285501685872,6.32219277017476,4.93757903863915,-1.98974199849122,6.8029453586845,-4.47482073821288,-2.89353901685366,6.19654462202962,1.44791941276988,2.01950206487354,3.29347544821835,1.70411388918498,-3.36842394773708,0.843537649290457,1.53904192617335,-0.0653393231022099,2.43481086719558,-2.28081054006986,-1.12101221091068,-5.74678650527647,2.81164429296665,-4.7739502651084,-0.836323550526183,1.21550795042252,1.3943021883996,1.4814166592311,5.83324212843683,5.74898742272061,5.20153475667944),
CV.3 = c(-1.98030009996666,-0.130982057250324,-0.13182806033636,4.66419380929057,5.76073945060135,4.68132496125842,4.76343610149589,4.14550671815003,-4.32639082067268,-4.24665489024982,-4.41960026466873,3.48306980151309,3.33978102573513,5.7630709271421,1.72213262278476,3.4138699327986,-0.214011687254588,-1.35717946591182,3.99742433050098,-4.11899265115508,-4.850265219848,-4.56241597162798,-5.1673124571133,3.88620294769555,-7.55945071289283,-5.18624310325486,-2.64740221288213,-3.34585676732483,-0.146912983782168,0.183282683148834,0.341803164827804,3.08878325423758,0.402559648490399,-0.589462854225432,-4.66295564242554,-4.70902036477095,-3.15037329091412,4.46721009678144,4.19323467451728,5.20598542755799),
CV.4 = c(-1.85773720384766,-3.29816018270707,-3.29805035723744,-1.0463680864694,-0.164642808251456,-1.88434766843655,-2.76184052196793,-1.69491772471098,0.0194432918943446,0.900426089523736,-0.581953934607345,-0.230042890025999,-1.79667524325622,-2.45893275735924,6.71016957191989,1.8888359729478,5.48587185602468,7.45260127587355,-0.447573770298677,-1.61748546155154,-2.01415972868345,-1.50135791552696,-0.439840157186184,-1.26569596255966,-1.04297110114946,-1.59978271452128,-0.471298592990895,-0.466524983137062,6.36590517153234,6.62852590954231,-3.04695209017556,-0.936146169909344,-2.4145719914164,5.10804058988218,-0.0744344020096521,-1.17738342385673,-1.67635978290671,-1.05954691377259,-0.0467102661118772,1.81264507750015))
centroids <- data.frame(group = c("1","2","3","4","5"),
CV.1 = c(10.7747979250003,-1.58534182381657,2.95743524695937,-12.7697062156805,-3.63247766512568),
CV.2 = c(-4.32335632559164,-2.26604134251075,1.43239910451168,-5.89694537234795,5.44083615635448),
CV.3 = c(4.65023399808197,-0.0670252808734024,-4.49663816927149,-0.299749480847027,4.1058254967538),
CV.4 = c(-1.469635462066,6.29185239579583,-0.838834486907799,-2.78309436507683,-0.363794106698444))
提前谢谢了!干杯。迪翁。