2

我们有一个聊天系统,有时每秒会为聊天期间的每个事件生成多个事件日志。问题是这些会消耗大量数据存储(在该平台上非常昂贵),我们希望简化我们实际存储的内容并删除真正不需要的内容。

为此,对于聊天在队列中的位置有一个事件类型。只要他们不干预该聊天的事件,我们就不会关心每个职位。因此,我们希望只保留每个不同组中没有其他事件类型的第一个和最后一个,以获得该期间的“排队总时间”。

更复杂的是,客户在被部门转移时可以进出队列,因此 SAME CHAT 可以有多个这些队列位置记录的块。我已经尝试使用 FIRST_VALUE 和 LAST_VALUE 并且它让我大部分时间到达那里,但是当我们遇到这些事件的两个不同块的情况时失败了。

这是生成测试数据的脚本:

<!-- language: lang-sql -->
    CREATE TABLE #testdata (
    id varchar(18),
    name varchar(8),
    [type] varchar(20),
    livechattranscriptid varchar(18),
    groupid varchar(40)) 

INSERT INTO #testdata (id,name,[type],livechattranscriptid,groupid) VALUES 
('0DZ14000003I2pOGAS','34128314','ChatRequest','57014000000ltfIAAQ','57014000000ltfIAAQChatRequest'),
('0DZ14000003IGmQGAW','34181980','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IHbqGAG','34185171','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003ILuHGAW','34201743','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IQ6cGAG','34217778','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IR7JGAW','34221794','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003IiDnGAK','34287448','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IiDoGAK','34287545','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003Iut5GAC','34336044','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003Iv7HGAS','34336906','Accept','57014000000ltfIAAQ','57014000000ltfIAAQAccept')

这是尝试识别其组的第一个和最后一个 id 的任何内容,该组按 name 字段排序并按transcriptid 分组:

select *,FIRST_VALUE(id) OVER(Partition BY groupid order by livechattranscriptid,name asc) as firstinstancegroup,
    LAST_VALUE(id) OVER(Partition BY groupid order by livechattranscriptid,name asc RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as lastinstancegroup from #testdata order by livechattranscriptid,name

问题是,它为整个组的所有它们提供了相同的第一个和最后一个 id,而不是将每组 Enqueue 记录视为一个不同的组。我如何将 Enqueue 的每个不同的分组实例视为一个唯一的组?

4

1 回答 1

0

这是一个类似的解决方案Grouping contiguous table data

不漂亮,但您会发现基于 OP 的逻辑。同一列上的连续数据

declare @mytable table (
    id varchar(18),
    name varchar(8),
    [type] varchar(20),
    livechattranscriptid varchar(18),
    groupid varchar(100)) 

INSERT INTO @mytable (id,name,[type],livechattranscriptid,groupid) VALUES 
('0DZ14000003I2pOGAS','34128314','ChatRequest','57014000000ltfIAAQ','57014000000ltfIAAQChatRequest'),
('0DZ14000003IGmQGAW','34181980','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IHbqGAG','34185171','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003ILuHGAW','34201743','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IQ6cGAG','34217778','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IR7JGAW','34221794','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003IiDnGAK','34287448','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IiDoGAK','34287545','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003Iut5GAC','34336044','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003Iv7HGAS','34336906','Accept','57014000000ltfIAAQ','57014000000ltfIAAQAccept')


;with myend as (   --- get all ends
 select 
 *
  from 
 (select 
   iif(groupid <> lead(groupid,1,groupid) over (order by name),
     id,
     'x') [newid],name
 from @mytable
 )x 
 where newid <> 'x'
 )
 , mystart as   -- get all starts
 (
 select 
  *
    from 
 (select 
   iif(groupid <> lag(groupid,1,groupid) over (order by name),
     id,
     'x') [newid], name,type,livechattranscriptid
 from @mytable
 )x 
 where newid <> 'x'
 )  ,
 finalstart as (   --- get all starts including the first row

  select id, 
    name,type,livechattranscriptid,
    row_number() over (order by name) rn
    from (
    select id,name,type,livechattranscriptid 
    from (
    select top 1 id, name,type,livechattranscriptid
    from @mytable
    order by name) x
    union all
    select newid,name,type,livechattranscriptid from mystart
    ) y

 ),
 finalend as   -- get all ends and add the last row
   (

  select id, 
    row_number() over (order by name) rn
    from (
    select id,name from (
    select top 1 id,name
    from @mytable
    order by name desc) x
    union all
    select newid,name from myend
    ) y
  )
select 
  s.id [startid]
  ,s.name
  ,s.type
  ,s.livechattranscriptid
  ,e.id [lastid]
   from    
  finalend e
  inner join finalstart s 
     on   e.rn = s.rn    --- bind the two results over the positions or row number
于 2018-01-26T01:07:13.420 回答