1

当我使用 OLEDB 时,从 Excel 表中读取 3200 行只需要 2 - 3 秒。我更改为 OpenXML 格式,现在从 Excel 表中读取 3200 行需要 1 多分钟。

下面是我的代码:

public static DataTable ReadExcelFileDOM(string filename)
{
    DataTable table;

    using (SpreadsheetDocument myDoc = SpreadsheetDocument.Open(filename, true))
    {
        WorkbookPart workbookPart = myDoc.WorkbookPart;
        Sheet worksheet = workbookPart.Workbook.Descendants<Sheet>().First();
        WorksheetPart worksheetPart =
         (WorksheetPart)(workbookPart.GetPartById(worksheet.Id));
        SheetData sheetData =
            worksheetPart.Worksheet.Elements<SheetData>().First();
        List<List<string>> totalRows = new List<List<string>>();
        int maxCol = 0;

        foreach (Row r in sheetData.Elements<Row>())
        {
            // Add the empty row.
            string value = null;
            while (totalRows.Count < r.RowIndex - 1)
            {
                List<string> emptyRowValues = new List<string>();
                for (int i = 0; i < maxCol; i++)
                {
                    emptyRowValues.Add("");
                }
                totalRows.Add(emptyRowValues);
            }


            List<string> tempRowValues = new List<string>();
            foreach (Cell c in r.Elements<Cell>())
            {
                #region get the cell value of c.
                if (c != null)
                {
                    value = c.InnerText;

                    // If the cell represents a numeric value, you are done. 
                    // For dates, this code returns the serialized value that 
                    // represents the date. The code handles strings and Booleans
                    // individually. For shared strings, the code looks up the 
                    // corresponding value in the shared string table. For Booleans, 
                    // the code converts the value into the words TRUE or FALSE.
                    if (c.DataType != null)
                    {
                        switch (c.DataType.Value)
                        {
                            case CellValues.SharedString:
                                // For shared strings, look up the value in the shared 
                                // strings table.
                                var stringTable = workbookPart.
                                    GetPartsOfType<SharedStringTablePart>().FirstOrDefault();

                                // If the shared string table is missing, something is 
                                // wrong. Return the index that you found in the cell.
                                // Otherwise, look up the correct text in the table.
                                if (stringTable != null)
                                {
                                    value = stringTable.SharedStringTable.
                                        ElementAt(int.Parse(value)).InnerText;
                                }
                                break;

                            case CellValues.Boolean:
                                switch (value)
                                {
                                    case "0":
                                        value = "FALSE";
                                        break;
                                    default:
                                        value = "TRUE";
                                        break;
                                }
                                break;
                        }
                    }

                    Console.Write(value + "  ");
                }
                #endregion

                // Add the cell to the row list.
                int i = Convert.ToInt32(c.CellReference.ToString().ToCharArray().First() - 'A');

                // Add the blank cell in the row.
                while (tempRowValues.Count < i)
                {
                    tempRowValues.Add("");
                }
                tempRowValues.Add(value);
            }

            // add the row to the totalRows.
            maxCol = processList(tempRowValues, totalRows, maxCol);

            Console.WriteLine();
        }

        table = ConvertListListStringToDataTable(totalRows, maxCol);
    }
    return table;
}

/// <summary>
/// Add each row to the totalRows.
/// </summary>
/// <param name="tempRows"></param>
/// <param name="totalRows"></param>
/// <param name="MaxCol">the max column number in rows of the totalRows</param>
/// <returns></returns>
private static int processList(List<string> tempRows, List<List<string>> totalRows, int MaxCol)
{
    if (tempRows.Count > MaxCol)
    {
        MaxCol = tempRows.Count;
    }

    totalRows.Add(tempRows);
    return MaxCol;
}

private static DataTable ConvertListListStringToDataTable(List<List<string>> totalRows, int maxCol)
{
    DataTable table = new DataTable();
    for (int i = 0; i < maxCol; i++)
    {
        table.Columns.Add();
    }
    foreach (List<string> row in totalRows)
    {
        while (row.Count < maxCol)
        {
            row.Add("");
        }
        table.Rows.Add(row.ToArray());
    }
    return table;
}

有没有一种有效的方法可以在某处更改此代码,以便读取过程更快一些?如何将其更改为代码以更快地阅读?

4

1 回答 1

0

您是否尝试过 SAX 方法?DOM 方法较慢,因为它加载到 DOM 中。

http://blogs.msdn.com/b/brian_jones/archive/2010/05/27/parsing-and-reading-large-excel-files-with-the-open-xml-sdk.aspx

如果您确定每个单元格都有一个单元格引用(例如“A1”),那么只需解析所有 Cell 类(而不是先解析 Row 类,然后再解析子 Cell 类)。我相信 Microsoft Excel 可以做到这一点。根据 Open XML 规范,单元格引用是一个可选属性。

于 2012-10-11T04:27:28.553 回答