任何猜测为什么在处理 300 000 行时此语句花费的时间太长。基本上,此查询用于查找重复项。
SELECT DISTINCT
a.Id,
b.Id as sid
FROM
csv_temp a
INNER JOIN
csv_temp b ON a.firstname = b.firstname AND
a.lastname = b.lastname AND
((a.address = b.address) OR
(a.zip = b.zip) OR
(a.city = b.city AND a.state = b.state) )
WHERE
a.Id <> b.Id AND
a.status=2 AND
b.status=1 AND
a.flag !=1 AND
b.flag !=1