C# OPEN XML: empty cells are getting skipped while getting data from EXCEL to DATATABLE

天大地大妈咪最大 提交于 2019-11-30 17:41:01

Had there been data in all the cells of a row then everything works good. The moment you have even a single empty cell in a row then things go haywire.

Why it is happening in first place?

This is because in the below code:

row.Descendants<Cell>().Count()

The Count() is the number of non-empty populated cells (not all columns). So, when you pass row.Descendants<Cell>().ElementAt(i) as an argument to GetCellValue method:

GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));

Then it will find the content of the next non-empty populated cell (not necessarily what is in that column index, i) e.g. if the first column is empty and we call ElementAt(1), it returns the value in the second column instead and the whole logic gets messed up.

Solution - We need to deal with the occurrence of empty cells: Essentially we need to figure out the original column index of the cell in case there were empty cells before it. So, you need to substitute your for loop code as below:

for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
      tempRow[i] = GetCellValue(spreadSheetDocument, row.Descendants<Cell>().ElementAt(i));
}

with

for (int i = 0; i < row.Descendants<Cell>().Count(); i++)
{
    Cell cell = row.Descendants<Cell>().ElementAt(i);
    int actualCellIndex = CellReferenceToIndex(cell);
    tempRow[actualCellIndex] = GetCellValue(spreadSheetDocument, cell);
}

and add below method in your code which is used in the above modified code snippet to obtain the original/correct column index of any cell:

private static int CellReferenceToIndex(Cell cell)
{
    int index = 0;
    string reference = cell.CellReference.ToString().ToUpper();
    foreach (char ch in reference)
    {
        if (Char.IsLetter(ch))
        {
            int value = (int)ch - (int)'A';
            index = (index == 0) ? value : ((index + 1) * 26) + value;
        }
        else
            return index;
    }
    return index;
}
user6330202
public void Read2007Xlsx()
        {
            try
            {
                DataTable dt = new DataTable();
                using (SpreadsheetDocument spreadSheetDocument = SpreadsheetDocument.Open(@"D:\File.xlsx", false))
                {
                    WorkbookPart workbookPart = spreadSheetDocument.WorkbookPart;
                    IEnumerable<Sheet> sheets = spreadSheetDocument.WorkbookPart.Workbook.GetFirstChild<Sheets>().Elements<Sheet>();
                    string relationshipId = sheets.First().Id.Value;
                    WorksheetPart worksheetPart = (WorksheetPart)spreadSheetDocument.WorkbookPart.GetPartById(relationshipId);
                    Worksheet workSheet = worksheetPart.Worksheet;
                    SheetData sheetData = workSheet.GetFirstChild<SheetData>();
                    IEnumerable<Row> rows = sheetData.Descendants<Row>();
                    foreach (Cell cell in rows.ElementAt(0))
                    {
                        dt.Columns.Add(GetCellValue(spreadSheetDocument, cell));
                    }
                    foreach (Row row in rows) //this will also include your header row...
                    {
                        DataRow tempRow = dt.NewRow();
                        int columnIndex = 0;
                        foreach (Cell cell in row.Descendants<Cell>())
                        {
                            // Gets the column index of the cell with data
                            int cellColumnIndex = (int)GetColumnIndexFromName(GetColumnName(cell.CellReference));
                            cellColumnIndex--; //zero based index
                            if (columnIndex < cellColumnIndex)
                            {
                                do
                                {
                                    tempRow[columnIndex] = ""; //Insert blank data here;
                                    columnIndex++;
                                }
                                while (columnIndex < cellColumnIndex);
                            }
                            tempRow[columnIndex] = GetCellValue(spreadSheetDocument, cell);

                            columnIndex++;
                        }
                        dt.Rows.Add(tempRow);
                    }
                }
                dt.Rows.RemoveAt(0); //...so i'm taking it out here.
            }
            catch (Exception ex)
            {
            }
        }
        /// <summary>
        /// Given a cell name, parses the specified cell to get the column name.
        /// </summary>
        /// <param name="cellReference">Address of the cell (ie. B2)</param>
        /// <returns>Column Name (ie. B)</returns>
        public static string GetColumnName(string cellReference)
        {
            // Create a regular expression to match the column name portion of the cell name.
            Regex regex = new Regex("[A-Za-z]+");
            Match match = regex.Match(cellReference);
            return match.Value;
        }
        /// <summary>
        /// Given just the column name (no row index), it will return the zero based column index.
        /// Note: This method will only handle columns with a length of up to two (ie. A to Z and AA to ZZ). 
        /// A length of three can be implemented when needed.
        /// </summary>
        /// <param name="columnName">Column Name (ie. A or AB)</param>
        /// <returns>Zero based index if the conversion was successful; otherwise null</returns>
        public static int? GetColumnIndexFromName(string columnName)
        {

            //return columnIndex;
            string name = columnName;
            int number = 0;
            int pow = 1;
            for (int i = name.Length - 1; i >= 0; i--)
            {
                number += (name[i] - 'A' + 1) * pow;
                pow *= 26;
            }
            return number;
        }
        public static string GetCellValue(SpreadsheetDocument document, Cell cell)
        {
            SharedStringTablePart stringTablePart = document.WorkbookPart.SharedStringTablePart;
            if (cell.CellValue ==null)
            {
            return "";
            }
            string value = cell.CellValue.InnerXml;
            if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
            {
                return stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
            }
            else
            {
                return value;
            }
        }

Try this code, i have done little modifications and it worked for me.

 public static DataTable Fill_dataTable(string filePath)
    {
        DataTable dt = new DataTable();

        using (SpreadsheetDocument doc = SpreadsheetDocument.Open(filePath, false))
        {
            Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet>();
            Worksheet worksheet = doc.WorkbookPart.GetPartById(sheet.Id.Value) as WorksheetPart.Worksheet;
            IEnumerable<Row> rows = worksheet.GetFirstChild<SheetData>().Descendants<Row>();
            DataTable dt = new DataTable();
            List<string> columnRef = new List<string>();
            foreach (Row row in rows)
            {
                if (row.RowIndex != null)
                {
                    if (row.RowIndex.Value == 1)
                    {
                        foreach (Cell cell in row.Descendants<Cell>())
                        {
                            dt.Columns.Add(GetValue(doc, cell));
                            columnRef.Add(cell.CellReference.ToString().Substring(0, cell.CellReference.ToString().Length - 1));
                        }
                    }
                    else
                    {
                        dt.Rows.Add();
                        int i = 0;
                        foreach (Cell cell in row.Descendants<Cell>())
                        {
                            while (columnRef(i) + dt.Rows.Count + 1 != cell.CellReference)
                            {
                                dt.Rows(dt.Rows.Count - 1)(i) = "";
                                i += 1;
                            }

                            dt.Rows(dt.Rows.Count - 1)(i) = GetValue(doc, cell);
                            i += 1;
                        }
                    }
                }
            }
        }

        return dt;
    }
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!