How to distinguish inline numbers from OLE Automation date numbers in OpenXML SpreadSheet?

我怕爱的太早我们不能终老 提交于 2019-12-18 17:36:52

问题


I have to consume some xlsx documents. I've read Reading a date from xlsx using open xml sdk and http://www.dotnetperls.com/fromoadate. Most of my columns are texts (shared strings), but there are some numbers (integer numbers), and I have also some dates and date-times. I'm using OpenXML SDK 2.5.

My problem is that I don't know how to distinguish the actual numbers from the dates. Both of them has DataType of null, and the textual number representation is in the Text property of the cell.

Some code:

  using (var xlsxStream = assembly.GetManifestResourceStream("Checklist.xlsx"))
  using (var spreadsheetDocument = SpreadsheetDocument.Open(xlsxStream, false))
  {
    var workbookPart = spreadsheetDocument.WorkbookPart;
    var sharedStringTable = workbookPart.SharedStringTablePart.SharedStringTable;
    var worksheetPart = workbookPart.WorksheetParts.First();
    var sheetData = worksheetPart.Worksheet.Elements<SheetData>().First();
    string text;
    foreach (Row r in sheetData.Elements<Row>())
    {
      foreach (Cell c in r.Elements<Cell>())
      {
        if (c.CellValue != null)
        {
          text = c.CellValue.Text;
          if (c.DataType != null)
          {
            if (c.DataType.Value == CellValues.SharedString)
            {
              int tableIndex = int.Parse(text);
              text = sharedStringTable.ChildElements[tableIndex].InnerText;
            }
            // note: the date cells do not have c.DataType.Value == CellValues.Date
            // Their c.DataType is null, if they are OLE Automation date numbers
          }
          // So here I am, and I'd need to know if the number supposed to be an
          // OLE Automation date or a number, so I can transform it if needed.
          //if (it's a date) // <- ?????
          //{
          //    double dateDouble = double.Parse(text);
          //    DateTime dateTime = DateTime.FromOADate(dateDouble);
          //    text = dateTime.ToShortDateString();
          //}
          Console.Write(text + " ");
        }
        else
        {
          Console.Write("NULL" + " ");
        }
      }
      Console.WriteLine();
    }
    Console.WriteLine();
    Console.ReadKey();

回答1:


I just came across similar issue and it is not easy to check whether a cell contains date/time value, see Using cell format to determine a cell contains date/time value, but the issue doesn't end with built-in number formats, I needed to handle custom formats too. There are no utilities in OpenXML SDK 2.5 to help, so I had to write my own (doesn't support Thai date/time formats).

public class ExcelHelper
{
    static uint[] builtInDateTimeNumberFormatIDs = new uint[] { 14, 15, 16, 17, 18, 19, 20, 21, 22, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57, 58 };
    static Dictionary<uint, NumberingFormat> builtInDateTimeNumberFormats = builtInDateTimeNumberFormatIDs.ToDictionary(id => id, id => new NumberingFormat { NumberFormatId = id });
    static Regex dateTimeFormatRegex = new Regex(@"((?=([^[]*\[[^[\]]*\])*([^[]*[ymdhs]+[^\]]*))|.*\[(h|mm|ss)\].*)", RegexOptions.Compiled);

    public static Dictionary<uint, NumberingFormat> GetDateTimeCellFormats(WorkbookPart workbookPart)
    {
        var dateNumberFormats = workbookPart.WorkbookStylesPart.Stylesheet.NumberingFormats
            .Descendants<NumberingFormat>()
            .Where(nf => dateTimeFormatRegex.Match(nf.FormatCode.Value).Success)
            .ToDictionary(nf => nf.NumberFormatId.Value);

        var cellFormats = workbookPart.WorkbookStylesPart.Stylesheet.CellFormats
            .Descendants<CellFormat>();

        var dateCellFormats = new Dictionary<uint, NumberingFormat>();
        uint styleIndex = 0;
        foreach (var cellFormat in cellFormats)
        {
            if (cellFormat.ApplyNumberFormat != null && cellFormat.ApplyNumberFormat.Value)
            {
                if (dateNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                {
                    dateCellFormats.Add(styleIndex, dateNumberFormats[cellFormat.NumberFormatId.Value]);
                }
                else if (builtInDateTimeNumberFormats.ContainsKey(cellFormat.NumberFormatId.Value))
                {
                    dateCellFormats.Add(styleIndex, builtInDateTimeNumberFormats[cellFormat.NumberFormatId.Value]);
                }
            }

            styleIndex++;
        }

        return dateCellFormats;
    }

    // Usage Example
    public static bool IsDateTimeCell(WorkbookPart workbookPart, Cell cell)
    {
        if (cell.StyleIndex == null)
            return false;

        var dateTimeCellFormats = ExcelHelper.GetDateTimeCellFormats(workbookPart);

        return dateTimeCellFormats.ContainsKey(cell.StyleIndex);
    }
}


来源:https://stackoverflow.com/questions/19034805/how-to-distinguish-inline-numbers-from-ole-automation-date-numbers-in-openxml-sp

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!