I have a list of people\'s ID and their first name, and a list of people\'s ID and their surname. Some people don\'t have a first name and some don\'t have a surname; I\'d l
I think there are problems with most of these, including the accepted answer, because they don't work well with Linq over IQueryable either due to doing too many server round trips and too much data returns, or doing too much client execution.
For IEnumerable I don't like Sehe's answer or similar because it has excessive memory use (a simple 10000000 two list test ran Linqpad out of memory on my 32GB machine).
Also, most of the others don't actually implement a proper Full Outer Join because they are using a Union with a Right Join instead of Concat with a Right Anti Semi Join, which not only eliminates the duplicate inner join rows from the result, but any proper duplicates that existed originally in the left or right data.
So here are my extensions that handle all of these issues, generate SQL as well as implementing the join in LINQ to SQL directly, executing on the server, and is faster and with less memory than others on Enumerables:
public static class Ext {
public static IEnumerable LeftOuterJoin(
this IEnumerable leftItems,
IEnumerable rightItems,
Func leftKeySelector,
Func rightKeySelector,
Func resultSelector) {
return from left in leftItems
join right in rightItems on leftKeySelector(left) equals rightKeySelector(right) into temp
from right in temp.DefaultIfEmpty()
select resultSelector(left, right);
}
public static IEnumerable RightOuterJoin(
this IEnumerable leftItems,
IEnumerable rightItems,
Func leftKeySelector,
Func rightKeySelector,
Func resultSelector) {
return from right in rightItems
join left in leftItems on rightKeySelector(right) equals leftKeySelector(left) into temp
from left in temp.DefaultIfEmpty()
select resultSelector(left, right);
}
public static IEnumerable FullOuterJoinDistinct(
this IEnumerable leftItems,
IEnumerable rightItems,
Func leftKeySelector,
Func rightKeySelector,
Func resultSelector) {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
public static IEnumerable RightAntiSemiJoin(
this IEnumerable leftItems,
IEnumerable rightItems,
Func leftKeySelector,
Func rightKeySelector,
Func resultSelector) {
var hashLK = new HashSet(from l in leftItems select leftKeySelector(l));
return rightItems.Where(r => !hashLK.Contains(rightKeySelector(r))).Select(r => resultSelector(default(TLeft),r));
}
public static IEnumerable FullOuterJoin(
this IEnumerable leftItems,
IEnumerable rightItems,
Func leftKeySelector,
Func rightKeySelector,
Func resultSelector) where TLeft : class {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
private static Expression> CastSMBody(LambdaExpression ex, TP unusedP, TC unusedC, TResult unusedRes) => (Expression>)ex;
public static IQueryable LeftOuterJoin(
this IQueryable leftItems,
IQueryable rightItems,
Expression> leftKeySelector,
Expression> rightKeySelector,
Expression> resultSelector) {
var sampleAnonLR = new { left = default(TLeft), rightg = default(IEnumerable) };
var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
var parmC = Expression.Parameter(typeof(TRight), "c");
var argLeft = Expression.PropertyOrField(parmP, "left");
var newleftrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, parmC), parmP, parmC), sampleAnonLR, default(TRight), default(TResult));
return leftItems.AsQueryable().GroupJoin(rightItems, leftKeySelector, rightKeySelector, (left, rightg) => new { left, rightg }).SelectMany(r => r.rightg.DefaultIfEmpty(), newleftrs);
}
public static IQueryable RightOuterJoin(
this IQueryable leftItems,
IQueryable rightItems,
Expression> leftKeySelector,
Expression> rightKeySelector,
Expression> resultSelector) {
var sampleAnonLR = new { leftg = default(IEnumerable), right = default(TRight) };
var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
var parmC = Expression.Parameter(typeof(TLeft), "c");
var argRight = Expression.PropertyOrField(parmP, "right");
var newrightrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, parmC, argRight), parmP, parmC), sampleAnonLR, default(TLeft), default(TResult));
return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).SelectMany(l => l.leftg.DefaultIfEmpty(), newrightrs);
}
public static IQueryable FullOuterJoinDistinct(
this IQueryable leftItems,
IQueryable rightItems,
Expression> leftKeySelector,
Expression> rightKeySelector,
Expression> resultSelector) {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
private static Expression> CastSBody(LambdaExpression ex, TP unusedP, TResult unusedRes) => (Expression>)ex;
public static IQueryable RightAntiSemiJoin(
this IQueryable leftItems,
IQueryable rightItems,
Expression> leftKeySelector,
Expression> rightKeySelector,
Expression> resultSelector) {
var sampleAnonLgR = new { leftg = default(IEnumerable), right = default(TRight) };
var parmLgR = Expression.Parameter(sampleAnonLgR.GetType(), "lgr");
var argLeft = Expression.Constant(default(TLeft), typeof(TLeft));
var argRight = Expression.PropertyOrField(parmLgR, "right");
var newrightrs = CastSBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, argRight), parmLgR), sampleAnonLgR, default(TResult));
return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).Where(lgr => !lgr.leftg.Any()).Select(newrightrs);
}
public static IQueryable FullOuterJoin(
this IQueryable leftItems,
IQueryable rightItems,
Expression> leftKeySelector,
Expression> rightKeySelector,
Expression> resultSelector) {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
}
The difference between a Right Anti-Semi-Join is mostly moot with Linq to Objects or in the source, but makes a difference on the server (SQL) side in the final answer, removing an unnecessary JOIN
.
The hand coding of Expression
to handle merging an Expression
into a lambda could be improved with LinqKit, but it would be nice if the language/compiler had added some help for that. The FullOuterJoinDistinct
and RightOuterJoin
functions are included for completeness, but I did not re-implement FullOuterGroupJoin
yet.
I wrote another version of a full outer join for IEnumerable
for cases where the key is orderable, which is about 50% faster than combining the left outer join with the right anti semi join, at least on small collections. It goes through each collection after sorting just once.
I also added another answer for a version that works with EF by replacing the Invoke
with a custom expansion.