3

我必须消耗一些xlsx文件。我已经阅读了使用 open xml sdkhttp://www.dotnetperls.com/fromoadate从 xlsx 读取日期。我的大部分专栏都是文本(共享字符串),但也有一些数字(整数),还有一些日期和日期时间。我正在使用 OpenXML SDK 2.5。

我的问题是我不知道如何区分实际数字和日期。它们都具有DataTypeof null,并且文本数字表示在Text单元格的属性中。

一些代码:

  using (var xlsxStream = assembly.GetManifestResourceStream("Checklist.xlsx"))
  using (var spreadsheetDocument = SpreadsheetDocument.Open(xlsxStream, false))
  {
    var workbookPart = spreadsheetDocument.WorkbookPart;
    var sharedStringTable = workbookPart.SharedStringTablePart.SharedStringTable;
    var worksheetPart = workbookPart.WorksheetParts.First();
    var sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
    string text;
    foreach (Row r in sheetData.Elements<Row>())
    {
      foreach (Cell c in r.Elements<Cell>())
      {
        if (c.CellValue != null)
        {
          text = c.CellValue.Text;
          if (c.DataType != null)
          {
            if (c.DataType.Value == CellValues.SharedString)
            {
              int tableIndex = int.Parse(text);
              text = sharedStringTable.ChildElements[tableIndex].InnerText;
            }
            // note: the date cells do not have c.DataType.Value == CellValues.Date
            // Their c.DataType is null, if they are OLE Automation date numbers
          }
          // So here I am, and I'd need to know if the number supposed to be an
          // OLE Automation date or a number, so I can transform it if needed.
          //if (it's a date) // <- ?????
          //{
          //    double dateDouble = double.Parse(text);
          //    DateTime dateTime = DateTime.FromOADate(dateDouble);
          //    text = dateTime.ToShortDateString();
          //}
          Console.Write(text + " ");
        }
        else
        {
          Console.Write("NULL" + " ");
        }
      }
      Console.WriteLine();
    }
    Console.WriteLine();
    Console.ReadKey();
4

2 回答 2

11

我刚遇到类似的问题,检查单元格是否包含日期/时间值并不容易,请参阅使用单元格格式确定单元格是否包含日期/时间值,但问题并没有以内置数字格式结束,我也需要处理自定义格式。OpenXML SDK 2.5 中没有实用程序可以提供帮助,所以我必须自己编写(不支持泰语日期/时间格式)。

public class ExcelHelper
{
    static uint[] builtInDateTimeNumberFormatIDs = new uint[] { 14, 15, 16, 17, 18, 19, 20, 21, 22, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58 };
    static Dictionary<uint, NumberingFormat> builtInDateTimeNumberFormats = builtInDateTimeNumberFormatIDs.ToDictionary(id => id, id => new NumberingFormat { NumberFormatId = id });
    static Regex dateTimeFormatRegex = new Regex(@"((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*)", RegexOptions.Compiled);

    public static Dictionary<uint, NumberingFormat> GetDateTimeCellFormats(WorkbookPart workbookPart)
    {
        var dateNumberFormats = workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats
            .Descendants<NumberingFormat>()
            .Where(nf => dateTimeFormatRegex.Match(nf.FormatCode.Value).Success)
            .ToDictionary(nf => nf.NumberFormatId.Value);

        var cellFormats = workbookPart.WorkbookStylesPart.Stylesheet.CellFormats
            .Descendants<CellFormat>();

        var dateCellFormats = new Dictionary<uint, NumberingFormat>();
        uint styleIndex = 0;
        foreach (var cellFormat in cellFormats)
        {
            if (cellFormat.ApplyNumberFormat != null && cellFormat.ApplyNumberFormat.Value)
            {
                if (dateNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                {
                    dateCellFormats.Add(styleIndex, dateNumberFormats[cellFormat.NumberFormatId.Value]);
                }
                else if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                {
                    dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
                }
            }

            styleIndex++;
        }

        return dateCellFormats;
    }

    // Usage Example
    public static bool IsDateTimeCell(WorkbookPart workbookPart, Cell cell)
    {
        if (cell.StyleIndex == null)
            return false;

        var dateTimeCellFormats = ExcelHelper.GetDateTimeCellFormats(workbookPart);

        return dateTimeCellFormats.ContainsKey(cell.StyleIndex);
    }
}
于 2013-10-25T06:34:01.687 回答
1

此答案是上述已接受答案的版本,以使其适用于失败的情况。Currency在您的单元格是应用了内置格式的类型的情况下,它会失败-&quot;$&quot;#,##0_);[Red]\(&quot;$&quot;#,##0\)

正则表达式((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*)(在接受的答案中)也解析上述内置格式,这反过来使其成为日期时间单元格&返回的值是日期时间值,而不是货币值。

GetDateTimeCellFormats稍微修改了该方法,以便正则表达式不会干扰任何不属于日期/时间/日期时间的内置格式。

        var dateCellFormats = new Dictionary<uint, NumberingFormat>();
        uint styleIndex = 0;
        foreach (var cellFormat in cellFormatList)
        {
            if (cellFormat.ApplyNumberFormat == null || !cellFormat.ApplyNumberFormat.Value)
            {
                styleIndex++;
                continue;
            }

            var numFmtId = cellFormat.NumberFormatId.Value;
            if (numFmtId < 164)
            {
                if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                    dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
            }
            else
            {
                if (dateNumberFormatsDict.ContainsKey(cellFormat.NumberFormatId.Value))
                    dateCellFormats.Add(styleIndex, dateNumberFormatsDict[cellFormat.NumberFormatId.Value]);
            }
            styleIndex++;
        }

其余所有代码保持不变。

于 2022-01-13T07:03:37.090 回答