我认为其中大多数都存在问题,包括接受的答案,因为它们不能很好地与 Linq over IQueryable 一起使用,因为执行了太多的服务器往返和太多的数据返回,或者执行了太多的客户端执行。
对于 IEnumerable,我不喜欢 Sehe 的答案或类似的答案,因为它使用过多的内存(一个简单的 10000000 两个列表测试在我的 32GB 机器上运行 Linqpad 的内存不足)。
此外,大多数其他人实际上并没有实现正确的完全外连接,因为他们使用的是带有右连接的联合而不是带有右反半连接的 Concat,这不仅从结果中消除了重复的内连接行,而且最初存在于左侧或右侧数据中的任何适当的重复项。
所以这是我的扩展,它们处理所有这些问题,生成 SQL 以及直接在 LINQ to SQL 中实现连接,在服务器上执行,并且比 Enumerables 上的其他扩展更快且内存更少:
public static class Ext {
public static IEnumerable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) {
return from left in leftItems
join right in rightItems on leftKeySelector(left) equals rightKeySelector(right) into temp
from right in temp.DefaultIfEmpty()
select resultSelector(left, right);
}
public static IEnumerable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) {
return from right in rightItems
join left in leftItems on rightKeySelector(right) equals leftKeySelector(left) into temp
from left in temp.DefaultIfEmpty()
select resultSelector(left, right);
}
public static IEnumerable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
public static IEnumerable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) {
var hashLK = new HashSet<TKey>(from l in leftItems select leftKeySelector(l));
return rightItems.Where(r => !hashLK.Contains(rightKeySelector(r))).Select(r => resultSelector(default(TLeft),r));
}
public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
this IEnumerable<TLeft> leftItems,
IEnumerable<TRight> rightItems,
Func<TLeft, TKey> leftKeySelector,
Func<TRight, TKey> rightKeySelector,
Func<TLeft, TRight, TResult> resultSelector) where TLeft : class {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
private static Expression<Func<TP, TC, TResult>> CastSMBody<TP, TC, TResult>(LambdaExpression ex, TP unusedP, TC unusedC, TResult unusedRes) => (Expression<Func<TP, TC, TResult>>)ex;
public static IQueryable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
var sampleAnonLR = new { left = default(TLeft), rightg = default(IEnumerable<TRight>) };
var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
var parmC = Expression.Parameter(typeof(TRight), "c");
var argLeft = Expression.PropertyOrField(parmP, "left");
var newleftrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, parmC), parmP, parmC), sampleAnonLR, default(TRight), default(TResult));
return leftItems.AsQueryable().GroupJoin(rightItems, leftKeySelector, rightKeySelector, (left, rightg) => new { left, rightg }).SelectMany(r => r.rightg.DefaultIfEmpty(), newleftrs);
}
public static IQueryable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
var sampleAnonLR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
var parmC = Expression.Parameter(typeof(TLeft), "c");
var argRight = Expression.PropertyOrField(parmP, "right");
var newrightrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, parmC, argRight), parmP, parmC), sampleAnonLR, default(TLeft), default(TResult));
return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).SelectMany(l => l.leftg.DefaultIfEmpty(), newrightrs);
}
public static IQueryable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
private static Expression<Func<TP, TResult>> CastSBody<TP, TResult>(LambdaExpression ex, TP unusedP, TResult unusedRes) => (Expression<Func<TP, TResult>>)ex;
public static IQueryable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
var sampleAnonLgR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
var parmLgR = Expression.Parameter(sampleAnonLgR.GetType(), "lgr");
var argLeft = Expression.Constant(default(TLeft), typeof(TLeft));
var argRight = Expression.PropertyOrField(parmLgR, "right");
var newrightrs = CastSBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, argRight), parmLgR), sampleAnonLgR, default(TResult));
return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).Where(lgr => !lgr.leftg.Any()).Select(newrightrs);
}
public static IQueryable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
this IQueryable<TLeft> leftItems,
IQueryable<TRight> rightItems,
Expression<Func<TLeft, TKey>> leftKeySelector,
Expression<Func<TRight, TKey>> rightKeySelector,
Expression<Func<TLeft, TRight, TResult>> resultSelector) {
return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
}
}
Right Anti-Semi-Join 之间的区别主要在于 Linq to Objects 或源代码中没有实际意义,但在最终答案中的服务器(SQL)端有所不同,删除了不必要的JOIN
.
Expression
可以使用 LinqKit 改进处理将 an 合并到 lambda的手动编码Expression<Func<>>
,但如果语言/编译器为此添加了一些帮助会很好。为了完整起见,包含了FullOuterJoinDistinct
andRightOuterJoin
函数,但我还没有重新实现FullOuterGroupJoin
。
我为键可排序的情况编写了另一个版本的全外连接IEnumerable
,这比将左外连接与右反半连接组合快约 50%,至少在小型集合上是这样。它只在排序一次后遍历每个集合。
我还通过将 替换为自定义扩展,为与 EF 一起使用的版本添加了另一个答案。Invoke