在我提出任何建议之前,我需要知道这个问题的答案:
name phone email
John 555-00-00 john@example.com
John 555-00-01 john@example.com
John 555-00-01 john-other@example.com
你COUNT(*)
想要这个数据是什么?
更新:
如果您只想知道记录有任何重复项,请使用以下命令:
WITH q AS (
SELECT 1 AS id, 'John' AS name, '555-00-00' AS phone, 'john@example.com' AS email
UNION ALL
SELECT 2 AS id, 'John', '555-00-01', 'john@example.com'
UNION ALL
SELECT 3 AS id, 'John', '555-00-01', 'john-other@example.com'
UNION ALL
SELECT 4 AS id, 'James', '555-00-00', 'james@example.com'
UNION ALL
SELECT 5 AS id, 'James', '555-00-01', 'james-other@example.com'
)
SELECT *
FROM q qo
WHERE EXISTS
(
SELECT NULL
FROM q qi
WHERE qi.id <> qo.id
AND qi.name = qo.name
AND (qi.phone = qo.phone OR qi.email = qo.email)
)
它更有效,但不会告诉您重复链从哪里开始。
此查询选择所有条目以及特殊字段 ,chainid
指示重复链的开始位置。
WITH q AS (
SELECT 1 AS id, 'John' AS name, '555-00-00' AS phone, 'john@example.com' AS email
UNION ALL
SELECT 2 AS id, 'John', '555-00-01', 'john@example.com'
UNION ALL
SELECT 3 AS id, 'John', '555-00-01', 'john-other@example.com'
UNION ALL
SELECT 4 AS id, 'James', '555-00-00', 'james@example.com'
UNION ALL
SELECT 5 AS id, 'James', '555-00-01', 'james-other@example.com'
),
dup AS (
SELECT id AS chainid, id, name, phone, email, 1 as d
FROM q
UNION ALL
SELECT chainid, qo.id, qo.name, qo.phone, qo.email, d + 1
FROM dup
JOIN q qo
ON qo.name = dup.name
AND (qo.phone = dup.phone OR qo.email = dup.email)
AND qo.id > dup.id
),
chains AS
(
SELECT *
FROM dup do
WHERE chainid NOT IN
(
SELECT id
FROM dup di
WHERE di.chainid < do.chainid
)
)
SELECT *
FROM chains
ORDER BY
chainid