I have some objects:
class Foo {
public Guid id;
public string description;
}
var list = new List();
list.Add(new Foo() { id = Guid.Empt
Override Equals(object obj) and GetHashCode() methods:
class Foo
{
public readonly Guid id;
public string description;
public override bool Equals(object obj)
{
return ((Foo)obj).id == id;
}
public override int GetHashCode()
{
return id.GetHashCode();
}
}
and then just call Distinct():
list = list.Distinct().ToList();
Using the Distinct()
method is about 4x faster than using GroupBy() in my informal tests. For 1 million Foo's my test has Distinct() at about 0.89 seconds to make a unique array out of a non-unique array where GroupBy() takes about 3.4 seconds.
My Distinct() call looks like,
var unique = list.Distinct(FooComparer.Instance).ToArray();
and FooComparer
looks like,
class FooComparer : IEqualityComparer<Foo> {
public static readonly FooComparer Instance = new FooComparer();
public bool Equals(Foo x, Foo y) {
return x.id.Equals(y.id);
}
public int GetHashCode(Foo obj) {
return obj.id.GetHashCode();
}
}
and my GroupBy()
version looks like,
var unique = (from l in list group l by l.id into g select g.First()).ToArray();
Create an IEqualityComparer<Foo>
which returns true if the id fields are the same, and pass that to the Distinct() operator.
A very elegant and intention revealing option is to define a new extension method on IEnumerable
So you have:
list = list.Distinct(foo => foo.id).ToList();
And ...
public static IEnumerable<T> Distinct<T,TKey>(this IEnumerable<T> list, Func<T,TKey> lookup) where TKey : struct {
return list.Distinct(new StructEqualityComparer<T, TKey>(lookup));
}
class StructEqualityComparer<T,TKey> : IEqualityComparer<T> where TKey : struct {
Func<T, TKey> lookup;
public StructEqualityComparer(Func<T, TKey> lookup) {
this.lookup = lookup;
}
public bool Equals(T x, T y) {
return lookup(x).Equals(lookup(y));
}
public int GetHashCode(T obj) {
return lookup(obj).GetHashCode();
}
}
A similar helper class can be built to compare objects. (It will need to do better null handling)
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
var list = new List<Foo>();
list.Add(new Foo() { id = Guid.Empty, description = "empty" });
list.Add(new Foo() { id = Guid.Empty, description = "empty" });
list.Add(new Foo() { id = Guid.NewGuid(), description = "notempty" });
list.Add(new Foo() { id = Guid.NewGuid(), description = "notempty2" });
var unique = from l in list
group l by new { l.id, l.description } into g
select g.Key;
foreach (var f in unique)
Console.WriteLine("ID={0} Description={1}", f.id,f.description);
Console.ReadKey();
}
}
class Foo
{
public Guid id;
public string description;
}
}