I am using the accepted solution here to convert an excel sheet into a datatable. This works fine if I have \"perfect\" data but if I have a blank cell in the middle of my
Here's a slightly modified version of Waylon's answer which also relied on other answers. It encapsulates his method in a class.
I changed
IEnumerator GetEnumerator()
|
to
IEnumerable GetRowCells(Row row)
|
Here's the class, you don't need to instantiate it, it just serves as an utility class:
public class SpreedsheetHelper
{
///returns an empty cell when a blank cell is encountered
///
public static IEnumerable GetRowCells(Row row)
{
int currentCount = 0;
foreach (DocumentFormat.OpenXml.Spreadsheet.Cell cell in
row.Descendants())
{
string columnName = GetColumnName(cell.CellReference);
int currentColumnIndex = ConvertColumnNameToNumber(columnName);
for (; currentCount < currentColumnIndex; currentCount++)
{
yield return new DocumentFormat.OpenXml.Spreadsheet.Cell();
}
yield return cell;
currentCount++;
}
}
///
/// Given a cell name, parses the specified cell to get the column name.
///
/// Address of the cell (ie. B2)
/// Column Name (ie. B)
public static string GetColumnName(string cellReference)
{
// Match the column name portion of the cell name.
var regex = new System.Text.RegularExpressions.Regex("[A-Za-z]+");
var match = regex.Match(cellReference);
return match.Value;
}
///
/// Given just the column name (no row index),
/// it will return the zero based column index.
///
/// Column Name (ie. A or AB)
/// Zero based index if the conversion was successful
/// thrown if the given string
/// contains characters other than uppercase letters
public static int ConvertColumnNameToNumber(string columnName)
{
var alpha = new System.Text.RegularExpressions.Regex("^[A-Z]+$");
if (!alpha.IsMatch(columnName)) throw new ArgumentException();
char[] colLetters = columnName.ToCharArray();
Array.Reverse(colLetters);
int convertedValue = 0;
for (int i = 0; i < colLetters.Length; i++)
{
char letter = colLetters[i];
int current = i == 0 ? letter - 65 : letter - 64; // ASCII 'A' = 65
convertedValue += current * (int)Math.Pow(26, i);
}
return convertedValue;
}
}
|
Now you're able to get all rows' cells in this way:
// skip the part that retrieves the worksheet sheetData
IEnumerable rows = sheetData.Descendants();
foreach(Row row in rows)
{
IEnumerable cells = SpreedsheetHelper.GetRowCells(row);
foreach (Cell cell in cells)
{
// skip part that reads the text according to the cell-type
}
}
|
It will contain all cells even if they are empty.