2

我正在构建一个将配置文件“匹配”在一起的应用程序。例如,这是我的架构的简化版本::

用户 ID
名字
姓氏

UserProfile Id
UserId
SomeOtherFields

UserProfileFields Id
UserProfileId
键值
_

UserProfile 存在以保存一些标准信息(出生日期等......)

UserProfileFields 基本上是一个键及其值的列表,以便建立一个看起来有点像这样的字典(再次,为了这个问题的目的而简化)

UserProfileID | Key       | Value       
123           | food      | Pizza
123           | food      | Indian
4453          | drink     | Coke
44850         | drink     | Orange Juice
88493         | food      | Pizza
448382        | food      | Chinese

所以,从上面,我们可以看到配置文件 123 与 food 上的 88493 匹配 - 他们都有 food|pizza

有没有一种方法可以有效地查询该表以获取“匹配项”列表

我设想它每天运行一次,结果存储在一个单独的表中

例如:

火柴

MatchID | ProfileID
1       | 123
1       | 88493

我在猜测类似的东西

SELECT * FROM UserProfileFields
GROUP BY Key

查询类型...但不确定这对 100 万行的效率有多高?

4

3 回答 3

2

这应该会为您解决。

-- ============================================================================
-- BEGIN: SETUP TEST DATA
-- ============================================================================
CREATE TABLE UserProfileFields (
    UserProfileID   int
   ,[Key]           varchar(5)
   ,Value           varchar(12)
);


INSERT UserProfileFields (UserProfileID, [Key], Value)
SELECT A.*
  FROM (
        SELECT * FROM UserProfileFields WHERE 1=2
        UNION ALL SELECT 123,       'food',     'Pizza'
        UNION ALL SELECT 123,       'food',     'Indian'
        UNION ALL SELECT 4453,      'drink',    'Coke'
        UNION ALL SELECT 44850,     'drink',    'Orange Juice'
        UNION ALL SELECT 88493,     'food',     'Pizza'
        UNION ALL SELECT 448382,    'food',     'Chinese'
        UNION ALL SELECT 88493,     'drink',    'Coke'
        UNION ALL SELECT 88493,     'drink',    'Orange Juice'
       ) A;

--/*
-- Turn 8 records into 1,048,576
DECLARE @Count int; SELECT @Count = 0;
WHILE @Count < 17
  BEGIN
    INSERT UserProfileFields
    SELECT * FROM UserProfileFields

    SELECT @Count = (@Count + 1)
END
--*/
-- SELECT COUNT(*) FROM UserProfileFields WITH (NOLOCK)
-- ============================================================================
-- END: SETUP TEST DATA
-- ============================================================================




-- ============================================================================
-- BEGIN: Solution if Key, Value, and UserProfileID do NOT make up a unique key
-- ============================================================================
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#DistinctValues', 'U') IS NOT NULL DROP TABLE #DistinctValues;
IF OBJECT_ID('tempdb..#Matches', 'U') IS NOT NULL DROP TABLE #Matches;

SELECT [Key], UserProfileID, Value 
  INTO #DistinctValues
  FROM UserProfileFields WITH (NOLOCK)
 GROUP BY [Key], UserProfileID, Value;

SELECT A.[Key], A.Value, A.UserProfileID
  INTO #Matches
  FROM #DistinctValues A
  JOIN #DistinctValues B
    ON A.[Key]           = B.[Key]
   AND A.Value           = B.Value
   AND A.UserProfileID  <> B.UserProfileID;

SELECT DENSE_RANK() OVER(ORDER BY A.[Key], A.Value) [MatchID]
      ,A.UserProfileID
      ,A.[Key]
      ,A.Value
  FROM #Matches A;
-- ============================================================================
-- END: Solution if Key, Value, and UserProfileID do NOT make up a unique key
-- ============================================================================




-- ============================================================================
-- BEGIN: Solution if Key, Value, and UserProfileID make up a unique key
-- ============================================================================
IF OBJECT_ID('tempdb..#Matches', 'U') IS NOT NULL DROP TABLE #Matches;

SELECT A.[Key], A.Value, A.UserProfileID
  INTO #Matches
  FROM UserProfileFields A WITH (NOLOCK)
  JOIN UserProfileFields B WITH (NOLOCK)
    ON A.[Key]           = B.[Key]
   AND A.Value           = B.Value
   AND A.UserProfileID  <> B.UserProfileID;

SELECT DENSE_RANK() OVER(ORDER BY A.[Key], A.Value) [MatchID]
      ,A.UserProfileID
      ,A.[Key]
      ,A.Value
  FROM #Matches A;
-- ============================================================================
-- END: Solution if Key, Value, and UserProfileID make up a unique key
-- ============================================================================
于 2013-03-15T06:03:15.927 回答
1

将选项与 EXISTS() 运算符和覆盖索引一起使用。这将有助于避免过度的数据排序。

CREATE INDEX ix_Key_Value_UserProfileFields ON dbo.UserProfileFields([Key], Value) INCLUDE(UserProfileID)

SELECT DENSE_RANK() OVER(ORDER BY t.[Key], t.Value) AS MatchID, t.UserProfileID
FROM dbo.UserProfileFields t
WHERE EXISTS (
              SELECT 1
              FROM dbo.UserProfileFields t2
              WHERE t.[Key] = t2.[Key]
                AND t.Value = t2.Value
              HAVING COUNT(*) > 1  
              )

在此处输入图像描述

SQLFiddle上的演示

于 2013-03-15T08:25:27.910 回答
1
WITH Matches
AS
(
    SELECT  a.UserProfileID,
            a.[Key],
            a.Value,
            DENSE_RANK() OVER(ORDER BY a.[Key]) MatchID
    FROM    UserProfileFields a
            INNER JOIN
            (
                SELECT  [Key], Value
                FROM    UserProfileFields 
                GROUP   BY [Key], Value
                HAVING  COUNT(DISTINCT UserProfileID) > 1
            ) b ON  a.[Key] = b.[Key] AND
                    a.Value = b.Value
)
SELECT  MatchID, UserProfileID
FROM    Matches
于 2013-03-15T01:10:37.827 回答