3

哪种方法最适合从数据表中删除多列的重复项?我的意思是下面的代码仅适用于单列。

public DataTable RemoveDuplicateRows(DataTable dTable, string colName)
{
   Hashtable hTable = new Hashtable();
   ArrayList duplicateList = new ArrayList();

   //Add list of all the unique item value to hashtable, which stores combination of key, value pair.
   //And add duplicate item value in arraylist.
   foreach (DataRow drow in dTable.Rows)
   {
      if (hTable.Contains(drow[colName]))
         duplicateList.Add(drow);
      else
         hTable.Add(drow[colName], string.Empty); 
   }

   //Removing a list of duplicate items from datatable.
   foreach (DataRow dRow in duplicateList)
      dTable.Rows.Remove(dRow);

   //Datatable which contains unique records will be return as output.
      return dTable;
}

我尝试使用字符串 [] colName。它在dTable.Rows.Remove(dRow);

请建议。

4

3 回答 3

3

最简单和最易读的方法是使用Linq-to-DataTable

var groups = from r in dTable.AsEnumerable()
             group r by new
             {
                 Col1 = r.Field<String>("Column1"),
                 Col2 = r.Field<String>("Column2"),
             };

// if you only want the first row of each group:
DataTable distinctTable = groups.Select(g => g.First()).CopyToDataTable();

注意:按具有两个属性(和)的匿名类型对 进行Enumerable.GroupBy分组,这些属性从字段和初始化。DataRowsCol1Col2DataRowColumn1Column2

所以你得到了IEnumerable<DataRow>. Enumerable.First()返回DataRow每个组的第一个(您也可以使用不同的方法来选择要保留的行,例如通过按日期字段排序)。

然后CopyToDataTable从(现在)不同的 DataRows 创建一个新的 DataTable。


如果您使用的是 .NET 2,这是一个可能的实现:

IEqualityComparer<Object[]>字典 自定义的实现:

class ObjectArrayComparer : IEqualityComparer<Object[]>
{
    public bool Equals(Object[] x, Object[] y)
    {
        if (x == null && y == null) return true;
        if (x == null || y == null) return false;
        if (x.Length  !=  y.Length) return false;       

        for (int i = 0; i < x.Length; i++)
        {
            if (x[i] == null && y[i] == null) continue;
            if (x[i] == null || y[i] == null) return false;
            if (!x[i].Equals(y[i])) return false;
        }
        return true;
    }

    public int GetHashCode(Object[] obj)
    {
        int hash = 0;
        if (obj != null)
        {
            hash = (hash * 17) + obj.Length;
            foreach (Object o in obj)
            {
                hash *= 17;
                if (o != null) hash = hash + o.GetHashCode();
            }
        }
        return hash;
    }
}

你的RemoveDuplicateRows方法:

public DataTable RemoveDuplicateRows(DataTable dTable, String[] colNames)
{
    var hTable = new Dictionary<object[], DataRow>(new ObjectArrayComparer());

    foreach (DataRow drow in dTable.Rows)
    {
        Object[] objects = new Object[colNames.Length];
        for (int c = 0; c < colNames.Length; c++)
            objects[c] = drow[colNames[c]];
        if (!hTable.ContainsKey(objects))
            hTable.Add(objects, drow);
    }

    // create a clone with the same columns and import all distinct rows
    DataTable clone = dTable.Clone();
    foreach (var kv in hTable)
        clone.ImportRow(kv.Value);

    return clone;
}

测试:

var table = new DataTable();
table.Columns.Add("Colum1", typeof(string));
table.Columns.Add("Colum2", typeof(int));
table.Columns.Add("Colum3", typeof(string));

Random r = new Random();
for (int i = 0; i < 100; i++)
{
    table.Rows.Add("Colum1_" + r.Next(1, 10), r.Next(1, 10), "Colum3_" + r.Next(1, 10));
}
int rowCount = table.Rows.Count; // 100
var unique = RemoveDuplicateRows(table, new[] { "Colum1", "Colum2" });
int uniqueRowCount = unique.Rows.Count; // around 55-65
于 2012-10-17T12:29:54.443 回答
0

您可以在 Datatable.Select ... Link上使用 Distinct

参考这个链接

于 2012-10-17T12:26:49.870 回答
0

下面是你的代码,我做了几个修改。
主要思想是添加到HashTable(我的代码中的字典<>)不是仅一列的值,而是指定列的值,并以原子方式(如单个)威胁它们这几个值。

            // your code with minor amends
    public DataTable RemoveDuplicateRows(DataTable dTable, string[] colNames)
    {
        // note that strongly typed dictionary has replaced the hash table + it uses custom comparer 
        var hTable = new Dictionary<DataRowInfo, string>();
        var duplicateList = new ArrayList();

        //Add list of all the unique item value to hashtable, which stores combination of key, value pair.
        //And add duplicate item value in arraylist.
        foreach (DataRow drow in dTable.Rows)
        {
            var dataRowInfo = new DataRowInfo(drow, colNames);

            if (hTable.ContainsKey(dataRowInfo))
                duplicateList.Add(drow);
            else
                hTable.Add(dataRowInfo, string.Empty);
        }

        //Removing a list of duplicate items from datatable.
        foreach (DataRow dRow in duplicateList)
            dTable.Rows.Remove(dRow);

        //Datatable which contains unique records will be return as output.
        return dTable;
    }

    // Helper classes

    // contains values of specified columns
    internal sealed class DataRowInfo
    {
        public object[] Values { get; private set; }

        public DataRowInfo(DataRow dataRow, string[] columns)
        {
            Values = columns.Select(c => dataRow[c]).ToArray();
        }

        public override bool Equals(object obj)
        {
            if (ReferenceEquals(this, obj))
                return true;

            var other = obj as DataRowInfo;
            if (other == null)
                return false;

            return Equals(other);
        }

        private bool Equals(DataRowInfo other)
        {
            if (this.Values.Length != other.Values.Length)
                return false;
            for (int i = 0; i < this.Values.Length; i++)
            {
                if (AreObjectsEqual(this.Values[i], other.Values[i]))
                    return false;
            }

            return true;
        }

        private static bool AreObjectsEqual(object left, object right)
        {
            if (ReferenceEquals(left, right))
                return true;

            if (ReferenceEquals(left, null))
                return false;

            if (ReferenceEquals(right, null))
                return false;

            if (left.GetType() != right.GetType())
                return false;

            return left.Equals(right);
        }

        public override int GetHashCode()
        {
            unchecked
            {
                int hashCode = 0;
                foreach (var value in this.Values)
                {
                    hashCode = hashCode ^ ((value != null ? value.GetHashCode() : 0) * 397);
                }
                return hashCode;
            }
        }
    }

希望这会有所帮助。

更新 简化代码。

于 2012-10-17T12:48:06.893 回答