Excel to DataTable using EPPlus - excel locked for editing

ε祈祈猫儿з 提交于 2019-11-26 16:10:13
Tim Schmelter

I see, that's what i've posted recently here(now corrected). It can be improved since the ExcelPackage and the FileStream(from File.OpenRead) are not disposed after using.

public static DataTable GetDataTableFromExcel(string path, bool hasHeader = true)
{
    using (var pck = new OfficeOpenXml.ExcelPackage())
    {
        using (var stream = File.OpenRead(path))
        {
            pck.Load(stream);
        }
        var ws = pck.Workbook.Worksheets.First();  
        DataTable tbl = new DataTable();
        foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
        {
            tbl.Columns.Add(hasHeader ? firstRowCell.Text : string.Format("Column {0}", firstRowCell.Start.Column));
        }
        var startRow = hasHeader ? 2 : 1;
        for (int rowNum = startRow; rowNum <= ws.Dimension.End.Row; rowNum++)
        {
            var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
            DataRow row = tbl.Rows.Add();
            foreach (var cell in wsRow)
            {
                row[cell.Start.Column - 1] = cell.Text;
            }
        }
        return tbl;
    }
}
Jafin

A extension version of Tim Schmelter's answer.

public static DataTable ToDataTable(this ExcelWorksheet ws, bool hasHeaderRow = true)
{
    var tbl = new DataTable();
    foreach (var firstRowCell in ws.Cells[1, 1, 1, ws.Dimension.End.Column])
        tbl.Columns.Add(hasHeaderRow ?
            firstRowCell.Text : string.Format("Column {0}", firstRowCell.Start.Column));
    var startRow = hasHeaderRow ? 2 : 1;
    for (var rowNum = startRow; rowNum <= ws.Dimension.End.Row; rowNum++)
    {
        var wsRow = ws.Cells[rowNum, 1, rowNum, ws.Dimension.End.Column];
        var row = tbl.NewRow();
        foreach (var cell in wsRow) row[cell.Start.Column - 1] = cell.Text;
        tbl.Rows.Add(row);
    }
    return tbl;
}

I've created a method that converts an Excel file to a DataTable using EPPlus, and tried to maintain Type Safety. Also duplicate column names are handled and with a boolean you can tell the method wether the sheet has a row with headers. I've created it for a complex import process that has several steps after uploading that requires user input before committing to the database.

private DataTable ExcelToDataTable(byte[] excelDocumentAsBytes, bool hasHeaderRow)
{
    DataTable dt = new DataTable();
    string errorMessages = "";

    //create a new Excel package in a memorystream
    using (MemoryStream stream = new MemoryStream(excelDocumentAsBytes))
    using (ExcelPackage excelPackage = new ExcelPackage(stream))
    {
        ExcelWorksheet worksheet = excelPackage.Workbook.Worksheets[1];

        //check if the worksheet is completely empty
        if (worksheet.Dimension == null)
        {
            return dt;
        }

        //add the columns to the datatable
        for (int j = worksheet.Dimension.Start.Column; j <= worksheet.Dimension.End.Column; j++)
        {
            string columnName = "Column " + j;
            var excelCell = worksheet.Cells[1, j].Value;

            if (excelCell != null)
            {
                var excelCellDataType = excelCell;

                //if there is a headerrow, set the next cell for the datatype and set the column name
                if (hasHeaderRow == true)
                {
                    excelCellDataType = worksheet.Cells[2, j].Value;

                    columnName = excelCell.ToString();

                    //check if the column name already exists in the datatable, if so make a unique name
                    if (dt.Columns.Contains(columnName) == true)
                    {
                        columnName = columnName + "_" + j;
                    }
                }

                //try to determine the datatype for the column (by looking at the next column if there is a header row)
                if (excelCellDataType is DateTime)
                {
                    dt.Columns.Add(columnName, typeof(DateTime));
                }
                else if (excelCellDataType is Boolean)
                {
                    dt.Columns.Add(columnName, typeof(Boolean));
                }
                else if (excelCellDataType is Double)
                {
                    //determine if the value is a decimal or int by looking for a decimal separator
                    //not the cleanest of solutions but it works since excel always gives a double
                    if (excelCellDataType.ToString().Contains(".") || excelCellDataType.ToString().Contains(","))
                    {
                        dt.Columns.Add(columnName, typeof(Decimal));
                    }
                    else
                    {
                        dt.Columns.Add(columnName, typeof(Int64));
                    }
                }
                else
                {
                    dt.Columns.Add(columnName, typeof(String));
                }
            }
            else
            {
                dt.Columns.Add(columnName, typeof(String));
            }
        }

        //start adding data the datatable here by looping all rows and columns
        for (int i = worksheet.Dimension.Start.Row + Convert.ToInt32(hasHeaderRow); i <= worksheet.Dimension.End.Row; i++)
        {
            //create a new datatable row
            DataRow row = dt.NewRow();

            //loop all columns
            for (int j = worksheet.Dimension.Start.Column; j <= worksheet.Dimension.End.Column; j++)
            {
                var excelCell = worksheet.Cells[i, j].Value;

                //add cell value to the datatable
                if (excelCell != null)
                {
                    try
                    {
                        row[j - 1] = excelCell;
                    }
                    catch
                    {
                        errorMessages += "Row " + (i - 1) + ", Column " + j + ". Invalid " + dt.Columns[j - 1].DataType.ToString().Replace("System.", "") + " value:  " + excelCell.ToString() + "<br>";
                    }
                }
            }

            //add the new row to the datatable
            dt.Rows.Add(row);
        }
    }

    //show error messages if needed
    Label1.Text = errorMessages;

    return dt;
}

The webforms button click for demo purposes.

protected void Button1_Click(object sender, EventArgs e)
{
    if (FileUpload1.HasFile)
    {
        DataTable dt = ExcelToDataTable(FileUpload1.FileBytes, CheckBox1.Checked);

        GridView1.DataSource = dt;
        GridView1.DataBind();
    }
}

This is an improvement to the generic one above. Use is if you have a class with the following properties, "Name", "Surname", "Telephone", "Fax" and you have a excel sheet with the first row with the same names, it will load the excel rows into a class object and pop it into a List

public static List<T> GetClassFromExcel<T>(string path, int fromRow, int fromColumn, int toRow = 0, int toColumn = 0)
{
if (toColumn != 0 && toColumn < fromColumn) throw new          Exception("toColumn can not be less than fromColumn");
if (toRow != 0 && toRow < fromRow) throw new Exception("toRow can not be less than fromRow");
List<T> retList = new List<T>();
using (var pck = new ExcelPackage())
{
            using (var stream = File.OpenRead(path))
            {
                pck.Load(stream);
            }
            //Retrieve first Worksheet
            var ws = pck.Workbook.Worksheets.First();
            //If the to column is empty or 0, then make the tocolumn to the count of the properties
            //Of the class object inserted
            toColumn = toColumn == 0 ? typeof(T).GetProperties().Count() : toColumn;

            //Read the first Row for the column names and place into a list so that
            //it can be used as reference to properties
            Dictionary<string, int> columnNames = new Dictionary<string, int>();
            // wsRow = ws.Row(0);
            var colPosition = 0;
            foreach (var cell in ws.Cells[1, 1, 1, toColumn == 0 ? ws.Dimension.Columns : toColumn])
            {
                columnNames.Add(cell.Value.ToString(), colPosition);
                colPosition++;
            }
            //create a instance of T
            T objT = Activator.CreateInstance<T>();
            //Retrieve the type of T
            Type myType = typeof(T);
            //Get all the properties associated with T
            PropertyInfo[] myProp = myType.GetProperties();


            //Loop through the rows of the excel sheet
            for (var rowNum = fromRow; rowNum <= (toRow == 0? ws.Dimension.End.Row : toRow); rowNum++)
            {
                var wsRow = ws.Cells[rowNum, fromColumn, rowNum, ws.Cells.Count()];

                foreach (var propertyInfo in myProp)
                {
                    if (columnNames.ContainsKey(propertyInfo.Name))
                    {
                        int position = 0;
                        columnNames.TryGetValue(propertyInfo.Name, out position);
                        //int position = columnNames.IndexOf(propertyInfo.Name);
                        //To prevent an exception cast the value to the type of the property.
                        propertyInfo.SetValue(objT, Convert.ChangeType(wsRow[rowNum, position + 1].Value, propertyInfo.PropertyType));
                    }
                }

                retList.Add(objT);
            }

        }
        return retList;
    }

now you can use the list as a databinding source if you need... A give from me to you... :) Daniel C. Vrey

Updated it for toColumn to work and added toRow and followed Andreas suggestions. Thumbs up for Andreas

public static List<T> getClassFromExcel<T>(string path, int fromRow, int fromColumn, int toColumn = 0) where T : class
        {
            using (var pck = new OfficeOpenXml.ExcelPackage())
            {
                List<T> retList = new List<T>();

                using (var stream = File.OpenRead(path))
                {
                    pck.Load(stream);
                }
                var ws = pck.Workbook.Worksheets.First();
                toColumn = toColumn == 0 ? typeof(T).GetProperties().Count() : toColumn;

                for (var rowNum = fromRow; rowNum <= ws.Dimension.End.Row; rowNum++)
                {
                    T objT = Activator.CreateInstance<T>();
                    Type myType = typeof(T);
                    PropertyInfo[] myProp = myType.GetProperties();

                    var wsRow = ws.Cells[rowNum, fromColumn, rowNum, toColumn];

                    for (int i = 0; i < myProp.Count(); i++)
                    {
                        myProp[i].SetValue(objT, wsRow[rowNum, fromColumn + i].Text);
                    }
                    retList.Add(objT);
                }
                return retList;
            }
        }
public static List<T> GetClassFromExcel<T>(string path, int fromRow, int fromColumn, int toRow = 0, int toColumn = 0) where T: class, new()
{
        if (toColumn != 0 && toColumn < fromColumn) throw new Exception("toColumn can not be less than fromColumn");
        if (toRow != 0 && toRow < fromRow) throw new Exception("toRow can not be less than fromRow");
        List<T> retList = new List<T>();
        using (var pck = new ExcelPackage())
        {
            using (var stream = File.OpenRead(path))
            {
                pck.Load(stream);
            }
            //Retrieve first Worksheet
            var ws = pck.Workbook.Worksheets.First();

            toColumn = toColumn == 0 ? typeof(T).GetProperties().Count() : toColumn; //If the to column is empty or 0, then make the tocolumn to the count of the properties Of the class object inserted

            //Read the first Row for the column names and place into a list so that
            //it can be used as reference to properties
            Dictionary<string, int> columnNames = new Dictionary<string, int>();
            // wsRow = ws.Row(0);
            var colPosition = 0;
            foreach (var cell in ws.Cells[1, 1, 1, toColumn == 0 ? ws.Dimension.Columns : toColumn])
            {
                columnNames.Add(cell.Value.ToString(), colPosition);
                colPosition++;
            }

            //Retrieve the type of T
            Type myType = typeof(T);

            //Get all the properties associated with T
            PropertyInfo[] myProp = myType.GetProperties();

            //Loop through the rows of the excel sheet
            for (var rowNum = fromRow + 1; rowNum <= (toRow == 0 ? ws.Dimension.End.Row : toRow); rowNum++) // fromRow + 1 to read from next row after columnheader
            {

                //create a instance of T
                //T objT = Activator.CreateInstance<T>();
                T objT = new T();

                // var wsRow = ws.Cells[rowNum, fromColumn, rowNum, ws.Cells.Count()]; //ws.Cells.Count() causing out of range error hence using ws.Dimension.Columns to get last column index 
                var wsRow = ws.Cells[rowNum, fromColumn, rowNum, ws.Dimension.Columns];
                foreach (var propertyInfo in myProp)
                {
                    var attribute = propertyInfo.GetCustomAttributes(typeof(DisplayNameAttribute), true).Cast<DisplayNameAttribute>().SingleOrDefault();
                    string displayName = attribute != null && !string.IsNullOrEmpty(attribute.DisplayName) ? attribute.DisplayName : propertyInfo.Name; // If DisplayName annotation not used then get property name itself                       
                    if (columnNames.ContainsKey(displayName))
                    {
                        int position = 0;                           
                        columnNames.TryGetValue(displayName, out position);
                        ////int position = columnNames.IndexOf(propertyInfo.Name);
                        ////To prevent an exception cast the value to the type of the property.
                        propertyInfo.SetValue(objT, Convert.ChangeType(wsRow[rowNum, position + 1].Value, propertyInfo.PropertyType));
                    }
                }                   
                retList.Add(objT);
            }

        }
        return retList;
    }
//IMPLEMENTATION DONE BY PLACING Code IT IN SEPARATE Helpers.CS file  and 
//Consuming it in this manner
List<CustomerExcelModel> records = 
Helpers.GetClassFromExcel<CustomerExcelModel>(filelocation, 1, 1);

Thanks a lot to the user who Submitted code and Andreas for suggestion Here are the Following changes done, i am new to generics so forgive and correct me for any mistakes please find modified code below it might help someone

  • Added Display Annotation entity model to map with the Excel Column name so that Column Name with spaces can also be handled.
  • had issue "T objT " as it was outside of for loop and hence caused same value repeatedly inserted into List fixed it by
    instantiating inside loop i.e using "new T()"
  • Fixed Column out of range error by using "ws.Dimension.Columns" to get Column count , instead of ws.Cells.Count() as it caused out range column error
  • for looping through row data added +1 to it ,as RowNum=1 was reading header name also so done minor change of "rowNum = fromRow + 1"

Here I am sharing how you can read the excel. You can modify it to store each date in datatables.

public void readXLS(string FilePath)
{
    FileInfo existingFile = new FileInfo(FilePath);
    using (ExcelPackage package = new ExcelPackage(existingFile))
    {
        //get the first worksheet in the workbook
        ExcelWorksheet worksheet = package.Workbook.Worksheets[1];
        int colCount = worksheet.Dimension.End.Column;  //get Column Count
        int rowCount = worksheet.Dimension.End.Row;     //get row count
        for (int row = 1; row <= rowCount; row++)
        {
            for (int col = 1; col <= colCount; col++)
            {
                //You can update code here to add each cell value to DataTable.
                Console.WriteLine(" Row:" + row + " column:" + col + " Value:" + worksheet.Cells[row, col].Value.ToString().Trim());
            }
        }
    }
}

Reff: http://sforsuresh.in/read-data-excel-sheet-insert-database-table-c/

VDWWD's answer above works great to keep type safety, and I built upon it with some improvements.

  • Method reads from a file directly.
  • Column type detection by using all rows and not just one value. Column type is set to String if more than type is found in the column.
  • Error Messages returned in a list of strings.

Here is the updated version:

    public static DataTable ExcelToDataTable(string path, ref List<string> errorList, bool hasHeaderRow = true  )
    {
        DataTable dt = new DataTable();
        errorList = new List<string>();

        //create a new Excel package           
        using (ExcelPackage excelPackage = new ExcelPackage())
        {
            using (var stream = File.OpenRead(path))
            {
                excelPackage.Load(stream);
            }

            ExcelWorksheet worksheet = excelPackage.Workbook.Worksheets[1];

            //check if the worksheet is completely empty
            if (worksheet.Dimension == null)
            {
                return dt;
            }

            //add the columns to the datatable
            for (int j = worksheet.Dimension.Start.Column; j <= worksheet.Dimension.End.Column; j++)
            {
                string columnName = "Column " + j;


                //Build hashset with all types in the row
                var columnTypes = new HashSet<Type>();                   
                for (int i = worksheet.Dimension.Start.Row + Convert.ToInt32(hasHeaderRow); i <= worksheet.Dimension.End.Row; i++)
                {
                    //Only add type if cell value not empty
                    if (worksheet.Cells[i, j].Value != null)
                    {
                        columnTypes.Add(worksheet.Cells[i, j].Value.GetType());
                    }
                }

                var excelCell = worksheet.Cells[1, j].Value;


                if (excelCell != null)
                {
                    Type excelCellDataType = null;

                    //if there is a headerrow, set the next cell for the datatype and set the column name
                    if (hasHeaderRow == true)
                    {

                        columnName = excelCell.ToString();

                        //check if the column name already exists in the datatable, if so make a unique name
                        if (dt.Columns.Contains(columnName) == true)
                        {
                            columnName = columnName + "_" + j;
                        }
                    }

                    //Select  input type for the column
                    if (columnTypes.Count == 1)
                    {
                        excelCellDataType = columnTypes.First();
                    }
                    else
                    {
                        excelCellDataType = typeof(string);
                    }

                    //try to determine the datatype for the column (by looking at the next column if there is a header row)
                    if (excelCellDataType == typeof(DateTime))
                    {
                        dt.Columns.Add(columnName, typeof(DateTime));
                    }
                    else if (excelCellDataType == typeof(Boolean))
                    {
                        dt.Columns.Add(columnName, typeof(Boolean));
                    }
                    else if (excelCellDataType == typeof(Double))
                    {
                        //determine if the value is a decimal or int by looking for a decimal separator
                        //not the cleanest of solutions but it works since excel always gives a double
                        if (excelCellDataType.ToString().Contains(".") || excelCellDataType.ToString().Contains(","))
                        {
                            dt.Columns.Add(columnName, typeof(Decimal));
                        }
                        else
                        {
                            dt.Columns.Add(columnName, typeof(Int64));
                        }
                    }
                    else
                    {
                        dt.Columns.Add(columnName, typeof(String));
                    }
                }
                else
                {
                    dt.Columns.Add(columnName, typeof(String));
                }
            }

            //start adding data the datatable here by looping all rows and columns
            for (int i = worksheet.Dimension.Start.Row + Convert.ToInt32(hasHeaderRow); i <= worksheet.Dimension.End.Row; i++)
            {
                //create a new datatable row
                DataRow row = dt.NewRow();

                //loop all columns
                for (int j = worksheet.Dimension.Start.Column; j <= worksheet.Dimension.End.Column; j++)
                {
                    var excelCell = worksheet.Cells[i, j].Value;

                    //add cell value to the datatable
                    if (excelCell != null)
                    {
                        try
                        {
                            row[j - 1] = excelCell;
                        }
                        catch
                        {
                            errorList.Add("Row " + (i - 1) + ", Column " + j + ". Invalid " + dt.Columns[j - 1].DataType.ToString().Replace("System.", "") + " value:  " + excelCell.ToString() );
                        }
                    }
                }

                //add the new row to the datatable
                dt.Rows.Add(row);
            }
        }

        return dt;
    }
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!