I am using the accepted solution here to convert an excel sheet into a datatable. This works fine if I have \"perfect\" data but if I have a blank cell in the middle of my
All good examples. Here is the one I am using since I need to keep track of all rows, cells, values, and titles for correlation and analysis.
The method ReadSpreadsheet opens an xlxs file and goes through each worksheet, row, and column. Since the values are stored in a referenced string table, I also explicitly use that per worksheet. There are other classes used: DSFunction and StaticVariables. The latter holds oft used parameter values, such as the referenced 'quotdouble' ( quotdouble = "\u0022"; ) and 'crlf' (crlf = "\u000D" + "\u000A"; ).
The relevant DSFunction method GetIntColIndexForLetter is included below. It returns an integer value for the column index corresponding to letter names such as (A,B, AA, ADE, etc.). This is used along with the parameter 'ncellcolref' to determine if any columns have been skipped and to enter empty string values for each one that is missing.
I also do some cleaning of the values before storing temporarily in a List object (using Replace method).
Subsequently, I use the hash table (Dictionary) of column names to extract values across different worksheets, correlate them, create normalized values, and then create an object used in our product which is then stored as an XML file. None of this is shown but is why this approach is used.
public static class DSFunction {
///
/// Creates an integer value for a column letter name starting at 1 for 'a'
///
/// Column name as letters
/// int value
public static int GetIntColIndexForLetter(string lettstr) {
string txt = "", txt1="";
int n1, result = 0, nbeg=-1, nitem=0;
try {
nbeg = (int)("a".ToCharArray()[0]) - 1; //1 based
txt = lettstr;
if (txt != "") txt = txt.ToLower().Trim();
while (txt != "") {
if (txt.Length > 1) {
txt1 = txt.Substring(0, 1);
txt = txt.Substring(1);
}
else {
txt1 = txt;
txt = "";
}
if (!DSFunction.IsNumberString(txt1, "real")) {
nitem++;
n1 = (int)(txt1.ToCharArray()[0]) - nbeg;
result += n1 + (nitem - 1) * 26;
}
else {
break;
}
}
}
catch (Exception ex) {
txt = ex.Message;
}
return result;
}
}
public static class Extractor {
public static string ReadSpreadsheet(string fileUri) {
string msg = "", txt = "", txt1 = "";
int i, n1, n2, nrow = -1, ncell = -1, ncellcolref = -1;
Boolean haveheader = true;
Dictionary hashcolnames = new Dictionary();
List colvalues = new List();
try {
if (!File.Exists(fileUri)) { throw new Exception("file does not exist"); }
using (SpreadsheetDocument ssdoc = SpreadsheetDocument.Open(fileUri, true)) {
var stringTable = ssdoc.WorkbookPart.GetPartsOfType().FirstOrDefault();
foreach (Sheet sht in ssdoc.WorkbookPart.Workbook.Descendants()) {
nrow = 0;
foreach (Row ssrow in ((WorksheetPart)(ssdoc.WorkbookPart.GetPartById(sht.Id))).Worksheet.Descendants()) {
ncell = 0;
ncellcolref = 0;
nrow++;
colvalues.Clear();
foreach (Cell sscell in ssrow.Elements| ()) {
ncell++;
n1 = DSFunction.GetIntColIndexForLetter(sscell.CellReference);
for (i = 0; i < (n1 - ncellcolref - 1); i++) {
if (nrow == 1 && haveheader) {
txt1 = "-missing" + (ncellcolref + 1 + i).ToString() + "-";
if (!hashcolnames.TryGetValue(txt1, out n2)) {
hashcolnames.Add(txt1, ncell - 1);
}
}
else {
colvalues.Add("");
}
}
ncellcolref = n1;
if (sscell.DataType != null) {
if (sscell.DataType.Value == CellValues.SharedString && stringTable != null) {
txt = stringTable.SharedStringTable.ElementAt(int.Parse(sscell.InnerText)).InnerText;
}
else if (sscell.DataType.Value == CellValues.String) {
txt = sscell.InnerText;
}
else txt = sscell.InnerText.ToString();
}
else txt = sscell.InnerText;
if (txt != "") txt1 = txt.ToLower().Trim(); else txt1 = "";
if (nrow == 1 && haveheader) {
txt1 = txt1.Replace(" ", "");
if (txt1 == "table/viewname") txt1 = "tablename";
else if (txt1 == "schemaownername") txt1 = "schemaowner";
else if (txt1 == "subjectareaname") txt1 = "subjectarea";
else if (txt1.StartsWith("column")) {
txt1 = txt1.Substring("column".Length);
}
if (!hashcolnames.TryGetValue(txt1, out n1)) {
hashcolnames.Add(txt1, ncell - 1);
}
}
else {
txt = txt.Replace(((char)8220).ToString(), "'"); //special "
txt = txt.Replace(((char)8221).ToString(), "'"); //special "
txt = txt.Replace(StaticVariables.quotdouble, "'");
txt = txt.Replace(StaticVariables.crlf, " ");
txt = txt.Replace(" ", " ");
txt = txt.Replace("<", "");
txt = txt.Replace(">", "");
colvalues.Add(txt);
}
}
}
}
}
}
catch (Exception ex) {
msg = "notok:" + ex.Message;
}
return msg;
}
}
|