1

我有一个记录事件的表:

create table #events
(  intRowId int identity(1,1),
   intItemId int,
   intUserId int,
   datEvent datetime)

这是一个包含数百万行的大表,记录了针对数千个项目和数万用户的事件。

我要查找一组包含十个 itemID 的精选组,但仅当它们以某种模式出现时:我试图找到所有这些十个项目都针对同一用户 ID 注册了事件并及时关闭的行,比如说 5 分钟。

我完全不知道该怎么做。有人会假设在某处涉及到分区,但如果帮助,即使只是在某个地方开始,将不胜感激。

干杯,马特

4

2 回答 2

1

假设您想要关于项目 id 的统计信息:1、2、...、10。首先创建一个表 EventByItems:

CREATE TABLE EventByItems
(  
   intRowId int identity(1,1),   
   intUserId int,
   datEvent datetime,
   intItem1 int,
   intItem2 int,
   intItem3 int,
   ...
   intItem10 int
)

然后使用查询来填充此表:

SELECT intUserId, datEvent, 
   SUM(pvt.[1]), SUM(pvt.[2]), SUM(pvt.[3]), ... , SUM(pvt.[10])
FROM #events
PIVOT
(
    COUNT(intItemId)
    FOR intItemId IN ([1], [2], [3], ... , [10])
) AS pvt
GROUP BY intUserId, datEvent

现在我们可以对那个表做一些工作。例如,我们可以根据您的逻辑对其进行更新以填补空白。或者我们可以做这样的查询:

SELECT
  intRowId,
  intUserId,
  datEvent
FROM
  EventByItems AS E
WHERE
  ((intItem1 > 0) OR EXISTS(SELECT * 
                            FROM EventByItems 
                            WHERE intUserId = E.intUserId
                             AND intItem1 > 0
                             AND DATEDIFF(MINUTE, datEvent, E.datEvent) <= 5
                             AND intRowId != E.intRowId ))
  AND

  ...

  AND
  ((intItem10 > 0) OR EXISTS(SELECT * 
                            FROM EventByItems 
                            WHERE intUserId = E.intUserId
                             AND intItem10 > 0
                             AND DATEDIFF(MINUTE, datEvent, E.datEvent) <= 5
                             AND intRowId != E.intRowId ))
于 2013-05-21T14:44:36.013 回答
0

好的,所以下面你会找到一个可以满足你要求的工作示例。我假设事件不必以十个形式出现。

但是该解决方案非常粗糙,并且运行缓慢,特别是如果您增加项目/用户的数量。无论如何,请不要为此对我投太多票,我希望你能找到一个合适的解决方案:)。

带有预选事件的临时表将有助于提高我的解决方案的性能,但您真正需要的是像 Oracle 中的窗口函数。

DROP TABLE #events
GO

create table #events
(  intRowId int identity(1,1),
   intItemId int,
   intUserId int,
   datEvent datetime)
 GO 

insert into #events (intUserId,intItemId,  datEvent)
select '1','1','2013-05-01 10:25' union all --group1
select '1','2','2013-05-01 10:25' union all --group1
select '1','3','2013-05-01 10:26' union all --group1
select '1','7','2013-05-01 10:25' union all
select '1','8','2013-05-01 10:25' union all
select '1','9','2013-05-01 10:26' union all
select '1','1','2013-05-01 10:50' union all --group2
select '1','2','2013-05-01 10:52' union all --group2
select '1','3','2013-05-01 10:59' union all 
select '1','1','2013-05-01 11:10' union all --group3
select '1','1','2013-05-01 11:12' union all --group3
select '1','3','2013-05-01 11:17' union all --group3
select '1','2','2013-05-01 11:25' union all
select '1','1','2013-05-01 11:31' union all
select '1','7','2013-05-01 11:32' union all
select '1','2','2013-05-01 11:50' union all --group4
select '1','2','2013-05-01 11:50' union all --group4
select '1','3','2013-05-01 11:50' union all --group4
select '1','1','2013-05-01 11:56'
GO 

DROP TABLE #temp
GO
select 
     e1.intRowId as intRowId_1, e1.intItemId as intItemId_1, e1.intUserId as intUserId_1, e1.datEvent as datEvent_1
    ,e2.intRowId as intRowId_2, e2.intItemId as intItemId_2, e2.intUserId as intUserId_2, e2.datEvent as datEvent_2
into #temp
from #events e1
join #events e2
    on e1.intUserId=e2.intUserId
    and e1.datEvent<=e2.datEvent
    and e1.intRowId<>e2.intRowId
where 1=1
    and e1.intUserId=1
    and e2.intUserId=1
    and e1.intItemId in (1,2,3)
    and e2.intItemId in (1,2,3)
    and datediff(minute,e1.datevent,e2.datevent)<6
order by 
    e1.intRowId, e2.intRowId
GO

select distinct
    * 
from (
    select 
        intRowId_1 as intRowId, intItemId_1 as intItemId, intUserId_1 as intUserId, datEvent_1 as datEvent
    from #temp

    UNION ALL

    select 
        intRowId_2 as intRowId, intItemId_2 as intItemId, intUserId_2 as intUserId, datEvent_2 as datEvent
    from #temp
) x
order by 
    datEvent, intRowId
于 2013-05-21T14:56:16.063 回答