4

我创建了以下视图“user_details_merged”:

SELECT DISTINCT
coalesce(own.user_name, join_user_name.user_name) AS user_name,
coalesce(own.email, join_mail.email) AS email,
coalesce(own.first_name, join_name.first_name) AS first_name,
coalesce(own.last_name, join_name.last_name) AS last_name
FROM
user_details AS own

LEFT JOIN user_details AS join_user_name ON 
    own.user_name IS NULL AND (
    (join_user_name.email = own.email AND own.email IS NOT NULL) 
    OR (join_user_name.first_name = own.first_name AND join_user_name.last_name = own.last_name 
    AND own.first_name IS NOT NULL AND own.last_name IS NOT NULL))


LEFT JOIN user_details AS join_mail ON 
    own.email IS NULL AND (
    (join_mail.user_name = own.user_name AND own.user_name IS NOT NULL) 
    OR (join_mail.first_name = own.first_name AND join_mail.last_name = own.last_name 
    AND own.first_name IS NOT NULL AND own.last_name IS NOT NULL))

LEFT JOIN user_details AS join_name ON 
    own.first_name IS NULL AND own.last_name IS NULL AND (
    (join_name.email = own.email AND own.email IS NOT NULL) 
    OR (join_name.user_name = own.user_name AND own.user_name IS NOT NULL))

ORDER BY user_name ASC,email ASC, first_name ASC, last_name ASC

这合并了我的列:

user_name | email | first_name | last_name
a             b       NULL         NULL
NULL          b        c            d
a            NULL      e            f
NULL          x        y            z

user_name | email | first_name | last_name
a             b       NULL         NULL
NULL          b        c            d
a            NULL      e            f
NULL          x        y            z

a             b        c            d
a             b        e            f

我想要的是:

user_name | email | first_name | last_name
NULL          x        y            z
a             b        c            d
a             b        e            f

当存在具有相同数据的行具有更多信息时,不包含包含 ROWS 的 NULL,但当没有具有更多信息的其他行时仍保留 /NULL xyz/。

这里的第二个视图正是我需要的:

SELECT DISTINCT a.user_name,a.email,a.first_name,a.last_name FROM
user_details_merged a
LEFT JOIN user_details_merged b
ON
(
    (
    a.user_name IS NOT NULL OR 
    NOT EXISTS (SELECT user_name FROM user_details_merged b WHERE b.user_name IS NOT NULL AND 
        b.email=ISNULL(a.email,b.email) AND 
        b.first_name=isnull(a.first_name,b.first_name) AND
        b.last_name=isnull(a.last_name,b.last_name))
    )

    AND

    (
    a.email IS NOT NULL OR 
    NOT EXISTS (SELECT email FROM user_details_merged b WHERE b.email IS NOT NULL AND 
        b.user_name=ISNULL(a.user_name,b.user_name) AND 
        b.first_name=isnull(a.first_name,b.first_name) AND
        b.last_name=isnull(a.last_name,b.last_name))
    )

    AND

    (
    (a.first_name IS NOT NULL AND a.last_name IS NOT NULL) OR 
    NOT EXISTS (SELECT email FROM user_details_merged b WHERE b.email IS NOT NULL AND 
        b.user_name=ISNULL(a.user_name,b.user_name) AND 
        b.email=ISNULL(a.email,b.email))
        -- AND b.first_name=isnull(a.first_name,b.first_name) AND b.last_name=isnull(a.last_name,b.last_name))
    )

    AND NOT (a.first_name = b.first_name AND a.last_name = b.last_name AND a.email = b.email AND a.user_name = b.user_name)

)

WHERE coalesce(b.user_name,b.email,b.first_name,b.last_name) IS NOT NULL

主要问题是数据来自的 user_details 视图由许多不同表的联合组成。有些仅包含用户名和电子邮件,有些仅包含电子邮件和名字/姓氏等。这就是为什么没有唯一键并且由于 UNIONS 而我无法索引视图的原因。这使得不可能在一小时内执行最后一个视图。我当前的解决方法是一个过程,它将 user_details_merged 视图的数据存储在临时表中,并让上面的第二个视图使用该表中的数据。这样我可以将 8000 行的执行时间减少到 7 秒。

还有其他建议吗?

非常感谢 ;)

4

3 回答 3

1

哎哟! 这是一个混乱的数据模型。最好的解决方案是修复数据模型以防止需要如此复杂的查询。但是,应用程序依赖项往往会更加复杂,所以我假设这已经被接受了。

  • 我使用 4 行示例来提出替代解决方案。
  • 然后我添加了几行只有名字和姓氏的值,这在上面的查询中暴露了一个遗漏的场景。
  • 我还将相同的 6 行复制到超过 12K 行中,因为数据模型似乎支持这种情况。这最终导致上述查询运行了 2 个多小时,然后我终于放弃并停止了它。
  • 我针对我的解决方案运行了 12K 行,它在不到一秒的时间内返回了预期的结果。

所以,不用多说:

-- =================================================================================
-- BEGIN: SETUP TEST DATA
-- =================================================================================
SET NOCOUNT ON 

IF OBJECT_ID('user_details', 'U') IS NOT NULL DROP TABLE user_details;
GO

CREATE TABLE dbo.user_details (
    user_name   char(1) NULL,
    email       char(1) NULL,
    first_name  char(1) NULL,
    last_name   char(1) NULL
)
GO

INSERT dbo.user_details
SELECT * 
  FROM (
        SELECT * FROM dbo.user_details WHERE 1=2
        UNION ALL SELECT 'a',   'b',    NULL,   NULL
        UNION ALL SELECT NULL,  'b',    'c',    'd'
        UNION ALL SELECT 'a',   NULL,   'e',    'f'
        UNION ALL SELECT NULL,  'x',    'y',    'z'
        UNION ALL SELECT NULL,  NULL,   'y',    'z'
        UNION ALL SELECT NULL,  NULL,   'a',    'z'
       ) A
GO

--/*
-- TURN 6 ROWS INTO OVER 12K ROWS TO TEST PERFORMANCE
DECLARE @count int; SELECT @count = 0
WHILE @count < 11
  BEGIN 
    INSERT user_details
    SELECT * 
      FROM user_details

    SELECT @count = @count + 1
END
--*/
-- =================================================================================
-- END: SETUP TEST DATA
-- =================================================================================


-- =================================================================================
-- BEGIN: NEW SOLUTION FINAL: <1sec on 12288 rows
-- =================================================================================
IF OBJECT_ID('tempdb..#useremail', 'U') IS NOT NULL DROP TABLE #useremail;
IF OBJECT_ID('tempdb..#email', 'U') IS NOT NULL DROP TABLE #email;
IF OBJECT_ID('tempdb..#user', 'U') IS NOT NULL DROP TABLE #user;
IF OBJECT_ID('tempdb..#name', 'U') IS NOT NULL DROP TABLE #name;


-- GET YOUR UNIQUE user_name AND email KEY
SELECT DISTINCT A.user_name, A.email
  INTO #useremail
  FROM user_details A


-- GET YOUR UNIQUE email VALUES
SELECT DISTINCT A.email, A.first_name, A.last_Name
  INTO #email
  FROM user_details A
 WHERE A.email IS NOT NULL


-- GET YOUR UNIQUE user_name VALUES
SELECT DISTINCT A.user_name, A.first_name, A.last_Name
  INTO #user
  FROM user_details A
 WHERE A.user_name IS NOT NULL


-- GET YOUR UNIQUE first_name AND last_Name VALUES NOT PART OF THE KEY
SELECT DISTINCT A.first_name, A.last_Name
  INTO #name
  FROM user_details A
 WHERE A.first_name IS NOT NULL
   AND A.last_Name IS NOT NULL
   AND A.user_name IS NULL
   AND A.email IS NULL


-- CLEAN UP YOUR UNIQUE user_name AND email KEY
DELETE A
-- SELECT *
  FROM #useremail A
  JOIN (
        SELECT *
          FROM #useremail
         WHERE user_name IS NOT NULL
           AND email IS NOT NULL
       ) B
    ON (A.user_name = B.user_name AND A.email     IS NULL)
    OR (A.email     = B.email     AND A.user_name IS NULL)


-- CLEAN UP YOUR UNIQUE email VALUES
DELETE A
-- SELECT *
  FROM #email A
  JOIN (
        SELECT *
          FROM #email
         WHERE first_name IS NOT NULL
           AND last_Name IS NOT NULL
       ) B
    ON A.email = B.email
   AND A.first_name IS NULL
   AND A.last_name IS NULL


-- CLEAN UP YOUR UNIQUE user_name VALUES
DELETE A
-- SELECT *
  FROM #user A
  JOIN (
        SELECT *
          FROM #user
         WHERE first_name IS NOT NULL
           AND last_Name IS NOT NULL
       ) B
    ON A.user_name = B.user_name
   AND A.first_name IS NULL
   AND A.last_name IS NULL


-- CLEAN UP YOUR UNIQUE #name VALUES
DELETE A
-- SELECT *
  FROM #name A
  JOIN #user B
    ON A.first_name = B.first_name
   AND A.last_name  = B.last_name

DELETE A
-- SELECT *
  FROM #name A
  JOIN #email B
    ON A.first_name = B.first_name
   AND A.last_name  = B.last_name


-- GET YOUR DATA
SELECT A.user_name
      ,A.email
      ,U.first_name
      ,U.last_name
      --,*
  FROM #useremail A
  JOIN #user U
    ON A.user_name = U.user_name
 UNION 
SELECT A.user_name
      ,A.email
      ,E.first_name
      ,E.last_name
      --,*
  FROM #useremail A
  JOIN #email E
    ON A.email = E.email
 UNION 
SELECT NULL as [user_name]
      ,NULL as [email]
      ,N.first_name
      ,N.last_name
      --,*
  FROM #name N
-- =================================================================================
-- END: NEW SOLUTION FINAL
-- =================================================================================
于 2013-03-12T05:26:41.680 回答
0

- 如果我这次是对的,您可以使用以下方法解决它:

 SELECT ISNULL(A.USER_NAME, B.USER_NAME), A.EMAIL, A.FIRST_NAME, A.LAST_NAME FROM
 user_details A CROSS JOIN user_details B
 WHERE A.EMAIL IS NOT NULL
 AND A.FIRST_NAME IS NOT NULL
 AND A.LAST_NAME IS NOT NULL
 GROUP BY ISNULL(A.USER_NAME, B.USER_NAME), A.EMAIL, A.FIRST_NAME, A.LAST_NAME
于 2013-02-12T17:36:44.753 回答
0

尝试对 user_details 使用外部联接。

于 2013-02-12T18:12:49.277 回答