C# Filter List to remove any double object

╄→гoц情女王★ 提交于 2019-12-04 19:26:47

Due to limitations in EF, we can't join DB query with in-memory list. Also, Contains can only be used with primitive list. So we need to make some efforts to find the duplicates on two columns.

var newItems = createListFromCSV();
var meterIds = newItems.Select(n=> n.meterid).Distinct().ToList();
var dates = newItems.Select(n=> n.date).Distinct().ToList();

var probableMatches = (from ri in db.RemoteReadings                              
                      where (meterIds.Contains(ri.meterids)
                      || dates.Contains(ri.date)
                      select new {ri.merterid, ri.date}).ToList();

var duplicates = (from existingRi in probaleMatches
                 join newRi in newItems
                 on new {existingRi.meterid, existingRi.date}
                 equals {newRi.meterid, newRi.date}
                 select newRi).ToList();

var insertList = newItems.Except(duplicates).ToList();

db.RemoteReadings.Insert(insertList); // or whatever

With the great help of aSharma and some other tweaks, I finally got a working and tested method. As my lists contain over 5000 items I had to execute in batches to override the 2112 SQL RPC call limitation. Added some comments and credits :)

/// List<RemoteReadings> contains a list of database Entity Classes RemoteReadings 
        public List<RemoteReadings> removeDublicatesFirst(List<RemoteReadings> lst)
        {

            try
            {

                DataClasses1DataContext db = new DataClasses1DataContext();

                var meterIds = lst.Select(n => n.meterId).Distinct().ToList();
                var dates = lst.Select(n => n.mydate).Distinct().ToList();

                var myfLst = new List<RemoteReadings>();
                // To avoid the following SqlException, Linq query should be exceuted in batches as follows.
                //{System.Data.SqlClient.SqlException 
                // The incoming tabular data stream (TDS) remote procedure call (RPC) protocol stream is incorrect.
                // Too many parameters were provided in this RPC request. The maximum is 2100.
                foreach (var batch in dates.Batch(2000))
                {
                    //  Gets a list of possible matches   from DB.
                    var probableMatches = (from ri in db.RemoteReadingss
                                           where (meterIds.Contains(ri.meterId)
                                           && batch.Contains(ri.mydate))
                                           select new { ri.meterId, ri.mydate }).ToList();

                    // Join the  probableMatches with the lst in memory on unique
                    // constraints meterid.date to find any duplicates
                    var duplicates = (from existingRi in probableMatches
                                      join newRi in lst
                                      on new
                                      {
                                          existingRi.meterId,
                                          existingRi.mydate
                                      }
                                      equals new { newRi.meterId, newRi.mydate }
                                      select newRi).ToList();

                    //Add duplicates in a new List due to batch executions.
                    foreach (var s in duplicates)
                    {
                        myfLst.Add(s);
                    }
                }

                // Remove the duplicates from lst found in myfLst;
                var insertList = lst.Except(myfLst).ToList();

                return insertList;

            }
            catch
        (Exception ex)
            {
                return null;
            }
        }


// Found this extension Class to divide IEnumerable in batches.
// http://stackoverflow.com/a/13731854/288865
 public static class MyExtensions
    {
        public static IEnumerable<IEnumerable<T>> Batch<T>(this IEnumerable<T> items,
                                                           int maxItems)
        {
            return items.Select((item, inx) => new { item, inx })
                        .GroupBy(x => x.inx / maxItems)
                        .Select(g => g.Select(x => x.item));
        }
    }
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!