I have a data frame (below) consisting of a first column (ID
) that I want to make into the rownames. However, there are duplicates in this first column. However, rather than just using !duplicated
to remove the duplicates, I want to use the rows that do not have an NA for a specificied column. For example the column named UHU
in the example data below (or anyone that I would use as argument). What is the most efficient way of doing this without looping through each duplication and checking the original against its duplicate for an NA
. I'd also like to be able to specify columns to check for NAs in an order of preference, i.e. first select duplicate with no NA for the UHU
column, and if equal check duplicate in another column, etc.
structure(list(ID = c("A1_0SM", "A1_0SP", "A2_04U",
"A2_04U", "A2_04U", "A2_04U", "A2_0CM",
"A2_0CM", "A2_0CM", "A2_0CM", "A2_0CM",
"A2_0CM", "A2_0D0", "A2_0D0", "A2_0D2",
"A2_0D2", "A2_0D2", "A2_0SX", "A2_0SX",
"A2_0SX", "A2_0SX", "A2_0SX", "A2_0T0",
"A2_0T0", "A2_0T0", "A2_0T2", "A2_0YE",
"A2_0YE", "A2_0YE", "A2_1G6", "A2_1G6",
"A2_1G6", "A7_0DA", "A7_0DA", "A7_0DA",
"A7_26G", "A7_26G", "A8_07C", "A8_07O",
"A8_08R", "A8_09X", "AC_2QH", "AN_04D",
"AN_0AL", "AN_0AR", "AN_0AT", "AN_0G0",
"AN_0XU", "AO_03U", "AO_03U", "AO_03U",
"AO_03U", "AO_03U", "AO_03U", "AO_0J4",
"AO_0J4", "AO_0J4", "AO_0J6", "AO_0J6",
"AO_0J6", "AO_0JL", "AO_0JL", "AO_0JL",
"AO_0JL", "AO_124", "AO_124", "AO_124",
"AO_128", "AO_128", "AO_128", "AO_128",
"AO_129", "AO_129", "AO_129", "AO_12F",
"AO_12F", "AO_12F", "AO_1MR", "AQ_04J",
"AQ_04J", "AQ_04J", "AQ_04J", "AQ_04J",
"AQ_04J", "AR_0TS", "AR_0TU", "AR_0U1",
"AR_0U4", "AR_1AR", "AR_1AR", "AR_1AY",
"AR_256", "AR_2LR", "BH_0B3", "BH_0B3",
"BH_0B3", "BH_0B9", "BH_0B9", "BH_0BG",
"BH_0BL"), UHU = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE,
TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE,
TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE), days_to_selection_end = c(NA, "172", "196", "119",
"119", "670", "147", "147", "601", "615", "433", "NA",
"125", "125", "123", "123", "179", "1359", "132", "1359", "132",
"NA", "234", "234", "234", "212", "[Completed]",
"[Completed]", "[Completed]", "172", "119", "119", "198", "107",
"107", "151", "151", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, "215", "215", "215", "215", "215", "215", "175", "203", "175",
"172", "116", "116", "178", "493", "122", "122", "125", "125",
"237", "124", "124", "124", "124", "161", "161", "161", "189",
"189", "189", NA, "104", "104", "159", "104", "104", "159", NA,
NA, NA, NA, NA, NA, NA, NA, NA, "213", "115", "115", "111", "111",
NA, NA), days_to_selection_start = c(NA, "107", "133", "78",
"78", "480", "98", "98", "391", "391", "391", "234", "62", "62",
"74", "74", "137", "1289", "62", "1289", "62", "1429", "61",
"61", "61", "92", "[Completed]", "[Completed]", "[Completed]",
"132", "69", "69", "122", "65", "65", "89", "89", NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, "70", "70", "70", "70", "70",
"70", "131", "189", "131", "130", "74", "74", "136", "136", "80",
"80", "60", "60", "146", "82", "82", "82", "82", "67", "67",
"67", "63", "63", "63", NA, "61", "61", "117", "61", "61", "117",
NA, NA, NA, NA, NA, NA, NA, NA, NA, "136", "52", "52", "48",
"48", NA, NA), selection_name = c(NA, "NA", "T0101tere",
"nw8m100", "991xan", "kkw", "991xan", "nw8m100", "i11io18el",
"Cape1000", "kkw", "99f101fen", "991xan", "i11io18el",
"991xan", "nw8m100", "T0101l", "82eplan18", "nw8m100",
"Gem1000", "991xan", "Xeloda", "T0101tere", "991xan", "nw8m100",
"Xeloda", "T0101tere", "nw8m100", "991xan", "i11io18el", "nw8m100",
"991xan", "T0101l", "nw8m100", "991xan", "991xan", "T0101tere",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "iu18101", "iu18101",
"111o1018", "98aj", "98aj", "111o1018",
"fox", "114iio18el", "teo882s", "114Iio18EL",
"teO882s", "98aj", "114Iio18EL", "Uwm",
"teO882s", "98aj", "teo882s", "98aj",
"114iio18el", "98aj", "teo882s", "teo882s",
"98aj", "98aj", "114iio18el", "teo882s",
"114iio18el", "98aj", "teo882s", NA, "991xan",
"991xan", "T0101l", "nw8m100", "nw8m100", "T0101l", NA,
NA, NA, NA, NA, NA, NA, NA, NA, "114iio18el", "teo882s",
"98aj", "teo882s", "98aj", NA, NA
)), .Names = c("ID", "UHU", "days_to_selection_end",
"days_to_selection_start", "selection_name"), row.names = c(NA,
100L), class = "data.frame")