I have a requirement where user will be uploading a csv file in the below format which will contain around 1.8 to 2 million records
SITE_ID,HOUSE,STREET,CITY
Here is one efficient method :
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Data.OleDb;
using System.Text.RegularExpressions;
using System.IO;
namespace ConsoleApplication23
{
class Program
{
const string FILENAME = @"c:\temp\test.csv";
static void Main(string[] args)
{
CSVReader csvReader = new CSVReader();
DataSet ds = csvReader.ReadCSVFile(FILENAME, true);
RegexCompare compare = new RegexCompare();
DataTable errors = compare.Get_Error_Rows(ds.Tables[0]);
}
}
class RegexCompare
{
public static Dictionary dict = new Dictionary() {
{ "SITE_ID", new RegexCompare() { columnName = "SITE_ID", pattern = @"[^\d]+", positveNegative = false, required = true}},
{ "HOUSE", new RegexCompare() { columnName = "HOUSE", pattern = @"[^\d]+", positveNegative = false, required = true}},
{ "STREET", new RegexCompare() { columnName = "STREET", pattern = @"[A-Za-z0-9 ]+", positveNegative = true, required = true}},
{ "CITY", new RegexCompare() { columnName = "CITY", pattern = @"[A-Za-z ]+", positveNegative = true, required = true}},
{ "STATE", new RegexCompare() { columnName = "STATE", pattern = @"[A-Za-z]{2}", positveNegative = true, required = true}},
{ "ZIP", new RegexCompare() { columnName = "ZIP", pattern = @"\d{5}", positveNegative = true, required = true}},
{ "APARTMENT", new RegexCompare() { columnName = "APARTMENT", pattern = @"\d*", positveNegative = true, required = false}},
};
string columnName { get; set;}
string pattern { get; set; }
Boolean positveNegative { get; set; }
Boolean required { get; set; }
public DataTable Get_Error_Rows(DataTable dt)
{
DataTable dtError = null;
foreach (DataRow row in dt.AsEnumerable())
{
Boolean error = false;
foreach (DataColumn col in dt.Columns)
{
RegexCompare regexCompare = dict[col.ColumnName];
object colValue = row.Field