0

我的数据框“df”中的“辅音”列有两个数字级别,“Geminate”为 250,“Singleton”为 214。因为 Geminates 的数量更多,我怎样才能删除 Geminates 的额外条目,从而等同于“Singleton”?输出应该是只有那些“Geminate”的条目应该保留在“Singleton”中有对应的条目。“文件名”列中的前两个字符表示说话者,然后是测试的标记词。

我的数据框有 13 列和 464 行,因此,张贴图片(知道不鼓励这样做)。在此先感谢您的帮助。问候。

我的数据框的片段

编辑: dput() 的输出在提供的链接中(因为我无法在此处粘贴它,超过了字符限制)。 dput() 输出

4

1 回答 1

0

基础 R 解决方案:

# Store a counter vector counting each comsenants value: ctr => integer vector
ctr <- with(df, ave(Consonant, Consonant, FUN = seq.int))

# Number of unique Consonant values: n_vals => integer scalar
n_vals <- length(unique(df$Consonant))

# Drop un-matched records: sbst_df => data.frame
sbst_df <- data.frame(do.call(rbind, Filter(function(x){nrow(x) == n_vals}, 
                                            split(dogri_vot, ctr))), row.names = NULL)

数据:

dogri_vot <- structure(list(V1 = 1:75, Filename = c("AK_baagge.TextGrid", 
"AK_baagi.TextGrid", "AK_bagga.TextGrid", "AK_buute.TextGrid", 
"AK_buutte.TextGrid", "AK_chaakki.TextGrid", "AK_chape.TextGrid", 
"AK_chappe.TextGrid", "AK_fati.TextGrid", "AK_fatti.TextGrid", 
"AK_gada.TextGrid", "AK_gadda.TextGrid", "AK_jaaddi.TextGrid", 
"AK_jaadi.TextGrid", "AK_katha.TextGrid", "AK_kute.TextGrid", 
"AK_kutte.TextGrid", "AK_raajegi.TextGrid", "AK_raajjegi.TextGrid", 
"AK_sada.TextGrid", "AK_saja.TextGrid", "AK_sajja.TextGrid", 
"AK_saka.TextGrid", "AK_sakka.TextGrid", "AK_tapa.TextGrid", 
"AK_tappa.TextGrid", "AK_topa.TextGrid", "AK_toppa.TextGrid", 
"DS_baagge.TextGrid", "DS_baagi.TextGrid", "DS_baga.TextGrid", 
"DS_bagga.TextGrid", "DS_buute.TextGrid", "DS_buutte.TextGrid", 
"DS_chaakki.TextGrid", "DS_chaaku.TextGrid", "DS_chape.TextGrid", 
"DS_chappe.TextGrid", "DS_fatti.TextGrid", "DS_gada.TextGrid", 
"DS_gadda.TextGrid", "DS_jaaddi.TextGrid", "DS_jaadi.TextGrid", 
"DS_katha.TextGrid", "DS_kattha.TextGrid", "DS_kute.TextGrid", 
"DS_kutte.TextGrid", "DS_sada.TextGrid", "DS_sadda.TextGrid", 
"DS_saka.TextGrid", "DS_sakka.TextGrid", "DS_tapa.TextGrid", 
"DS_tappa.TextGrid", "DS_topa.TextGrid", "DS_toppa.TextGrid", 
"MS_baagge.TextGrid", "MS_baagi.TextGrid", "MS_baga.TextGrid", 
"MS_bagga.TextGrid", "MS_buute.TextGrid", "MS_buutte.TextGrid", 
"MS_chaakki.TextGrid", "MS_chaaku.TextGrid", "MS_chape.TextGrid", 
"MS_chappe.TextGrid", "MS_fati.TextGrid", "MS_fatti.TextGrid", 
"MS_gadda.TextGrid", "MS_jaaddi.TextGrid", "MS_jaadi.TextGrid", 
"MS_katha.TextGrid", "MS_kattha.TextGrid", "MS_kute.TextGrid", 
"MS_kutte.TextGrid", "MS_raajegi.TextGrid"), Speaker = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), Consonant = c("Geminate", 
"Singleton", "Geminate", "Singleton", "Geminate", "Geminate", 
"Singleton", "Geminate", "Singleton", "Geminate", "Singleton", 
"Geminate", "Geminate", "Singleton", "Singleton", "Singleton", 
"Geminate", "Singleton", "Geminate", "Singleton", "Singleton", 
"Geminate", "Singleton", "Geminate", "Singleton", "Geminate", 
"Singleton", "Geminate", "Geminate", "Singleton", "Singleton", 
"Geminate", "Singleton", "Geminate", "Geminate", "Singleton", 
"Singleton", "Geminate", "Geminate", "Singleton", "Geminate", 
"Geminate", "Singleton", "Singleton", "Geminate", "Singleton", 
"Geminate", "Singleton", "Geminate", "Singleton", "Geminate", 
"Singleton", "Geminate", "Singleton", "Geminate", "Geminate", 
"Singleton", "Singleton", "Geminate", "Singleton", "Geminate", 
"Geminate", "Singleton", "Singleton", "Geminate", "Singleton", 
"Geminate", "Geminate", "Geminate", "Singleton", "Singleton", 
"Geminate", "Singleton", "Geminate", "Singleton"), VOT.dn = c(2.6688227, 
5.3873637, 3.4600502, 1.5246856, 1.2237697, 5.7330285, 1.192124, 
1.732151, 1.7444548, 1.7133051, 1.4753655, 0.9625276, 1.6766487, 
2.7441848, 7.37298, 2.7602911, 4.3993992, 5.9935975, 7.010274, 
2.5616875, 5.5951297, 3.9080006, 2.2465927, 3.2889385, 1.0705113, 
1.2967303, 1.5396249, 0.9897444, 3.2001336, 4.6939881, 2.6724766, 
1.9066545, 2.0054339, 1.8826236, 6.0663448, 5.1022693, 2.3050381, 
0.9959057, 1.4804389, 1.6683009, 2.4245469, 3.164039, 2.0648408, 
6.2447475, 6.65083, 2.4059111, 1.7843229, 1.0648602, 1.0570399, 
2.3740635, 1.9817196, 2.0791699, 1.1780412, 1.5373779, 1.250764, 
1.5723396, 1.1632927, 0.992896, 1.7316937, 0.9249825, 1.031242, 
4.9168736, 2.7092267, 1.0511894, 1.0526788, 1.0565391, 1.0764399, 
0.8969808, 1.490155, 1.2981336, 5.8849308, 5.5543907, 2.3143402, 
2.906642, 3.190377), VOT = c(0.026688227, 0.053873637, 0.034600502, 
0.015246856, 0.012237697, 0.057330285, 0.01192124, 0.01732151, 
0.017444548, 0.017133051, 0.014753655, 0.009625276, 0.016766487, 
0.027441848, 0.0737298, 0.027602911, 0.043993992, 0.059935975, 
0.07010274, 0.025616875, 0.055951297, 0.039080006, 0.022465927, 
0.032889385, 0.010705113, 0.012967303, 0.015396249, 0.009897444, 
0.032001336, 0.046939881, 0.026724766, 0.019066545, 0.020054339, 
0.018826236, 0.060663448, 0.051022693, 0.023050381, 0.009959057, 
0.014804389, 0.016683009, 0.024245469, 0.03164039, 0.020648408, 
0.062447475, 0.0665083, 0.024059111, 0.017843229, 0.010648602, 
0.010570399, 0.023740635, 0.019817196, 0.020791699, 0.011780412, 
0.015373779, 0.01250764, 0.015723396, 0.011632927, 0.00992896, 
0.017316937, 0.009249825, 0.01031242, 0.049168736, 0.027092267, 
0.010511894, 0.010526788, 0.010565391, 0.010764399, 0.008969808, 
0.01490155, 0.012981336, 0.058849308, 0.055543907, 0.023143402, 
0.02906642, 0.03190377), xsampa = c("A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A"), Place = c("Velar", "Velar", "Velar", 
"Retroflex", "Retroflex", "Velar", "Bilabial", "Bilabial", "Retroflex", 
"Retroflex", "Dental/alveolar", "Dental/alveolar", "Dental/alveolar", 
"Dental/alveolar", "Dental/alveolar", "Dental/alveolar", "Dental/alveolar", 
"Palatal", "Palatal", "Dental/alveolar", "Palatal", "Palatal", 
"Velar", "Velar", "Bilabial", "Bilabial", "Bilabial", "Bilabial", 
"Velar", "Velar", "Velar", "Velar", "Palatal", "Palatal", "Velar", 
"Velar", "Bilabial", "Bilabial", "Retroflex", "Dental/alveolar", 
"Dental/alveolar", "Dental/alveolar", "Dental/alveolar", "Dental/alveolar", 
"Dental/alveolar", "Dental/alveolar", "Dental/alveolar", "Dental/alveolar", 
"Dental/alveolar", "Velar", "Velar", "Bilabial", "Bilabial", 
"Bilabial", "Bilabial", "Bilabial", "Bilabial", "Bilabial", "Bilabial", 
"Retroflex", "Retroflex", "Velar", "Velar", "Bilabial", "Bilabial", 
"Retroflex", "Retroflex", "Dental/alveolar", "Dental/alveolar", 
"Dental/alveolar", "Dental/alveolar", "Dental/alveolar", "Dental/alveolar", 
"Dental/alveolar", "Palatal"), Manner = c("Stop", "Stop", "Stop", 
"Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", 
"Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Palatal", "Palatal", 
"Stop", "Palatal", "Palatal", "Stop", "Stop", "Stop", "Stop", 
"Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", 
"Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", 
"Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", 
"Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", 
"Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", 
"Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", "Stop", 
"Palatal"), Voicing = c("Voiced", "Voiced", "Voiced", "Voiceless", 
"Voiceless", "Voiceless", "Voiceless", "Voiceless", "Voiceless", 
"Voiceless", "Voiced", "Voiced", "Voiced", "Voiced", "Voiceless", 
"Voiceless", "Voiceless", "Voiced", "Voiced", "Voiced", "Voiced", 
"Voiced", "Voiceless", "Voiceless", "Voiceless", "Voiceless", 
"Voiceless", "Voiceless", "Voiced", "Voiced", "Voiced", "Voiced", 
"Voiceless", "Voiceless", "Voiceless", "Voiceless", "Voiceless", 
"Voiceless", "Voiceless", "Voiced", "Voiced", "Voiced", "Voiced", 
"Voiceless", "Voiceless", "Voiceless", "Voiceless", "Voiced", 
"Voiced", "Voiceless", "Voiceless", "Voiceless", "Voiceless", 
"Voiceless", "Voiceless", "Voiced", "Voiced", "Voiced", "Voiced", 
"Voiceless", "Voiceless", "Voiceless", "Voiceless", "Voiceless", 
"Voiceless", "Voiceless", "Voiceless", "Voiced", "Voiced", "Voiced", 
"Voiceless", "Voiceless", "Voiceless", "Voiceless", "Voiced")), 
class = "data.frame", row.names = c(NA, -75L))
于 2020-11-17T04:50:55.517 回答