0

我的表结构看起来与此类似

Customer_id Country item_type   Order_Size  Dates      Codes
A401           US   Fruit        Small       3/14/2016  11
A401           US   Fruit        Big         5/22/2016  12
A401           US   Vegetable   Small        7/12/2016  11
B509           US   Vegetable   Small        3/25/2015  92
B509           US   Vegetable   Big          3/15/2014  11
B509           US   Vegetable   Small        3/1/2014   34
A402           CA   Fruit       Small        3/14/2016  56
A402           CA   Fruit       Big          5/22/2016  76
A402           CA   Fruit       Small       7/12/2016   85
A403           CA   Vegetable   Small       7/12/2016   11
A403           CA   Vegetable   Small       3/25/2015   16
A403           CA   Vegetable   Big         3/15/2014   17
A403           CA   Vegetable   Small       3/1/2014    12

我正在寻找每个国家/地区只有在购买 Order_size =Big 并且仅使用 order_size<>Big 购买的商品后,才会出现每个 item_type 的重复客户数量。为了实现这一点,我编写了这段代码。

 SELECT Country,item_type,count(customer_id) from
   (select Country,customer_id, t.item_type, count(*)  as REPEATS
     from (select t.*,
         min(case when Order_Size = 'Big' then dates end) over (partition by customer_id, item_type) as min_big
  from data_test as t
 ) t
where dates > min_big
group by 1,2,3) D
group by 1,2

结果:

Country item_type   Count(Distinct(Customer_id))
CA  Vegetable   1
US  Vegetable   1
CA  Fruit   1

这现在有效,但我想再添加一个条件,仅当代码在某个带有条件的表中时,所以我想添加多个条件,其中一个是子查询,当我修改我的代码时。

SELECT Country,item_type,count(customer_id) from
   (select Country,customer_id, t.item_type, count(*)  as REPEATS
     from (select t.*,
         min(case when (Order_Size = 'Big' and Codes IN (SELECT CODES from table1 where type='TRUE' group by 1)) then dates end) over (partition by customer_id, item_type) as min_big
  from data_test as t
 ) t
where dates > min_big
group by 1,2,3) D
group by 1,2

这会引发错误 - case 语句中的 When 子句中的非法表达式。我还读到您不能在 case 中使用子查询,也不能使用IN。我已经阅读了许多与此相关的其他问题,但我仍然不清楚如何避免使用子查询以防万一。如何更改不会引发错误并且由于我的表非常大而可以快速处理的代码?

4

1 回答 1

0

您可以更改代码以在其他规则之前连接到驱动“是否应该包含此代码”的答案的表。确保对代码字段上不同的数据集进行外部连接,以防止被欺骗。

这种方法在所有其他规则之前包含条件的结果,并且不会引发错误。下面我在代码上创建了一个具有唯一主索引的 volatile 表来执行此操作,但您可以加入派生表并获得类似的结果。

create volatile table vt_fruit_exp
( Customer_id char(4)
, Country char(2)
, item_type varchar(20)
, Order_Size char(5)
, Dates date
, Codes byteint)
primary index (Customer_id) on commit preserve rows;

insert into vt_fruit_exp values('A401','US','Fruit'    ,'Small' ,'2016-03-14', 11);
insert into vt_fruit_exp values('A401','US','Fruit'    ,'Big'   ,'2016-05-22', 12);
insert into vt_fruit_exp values('A401','US','Vegetable','Small' ,'2016-07-12', 11);
insert into vt_fruit_exp values('B509','US','Vegetable','Small' ,'2015-03-25', 92);
insert into vt_fruit_exp values('B509','US','Vegetable','Big'   ,'2014-03-15', 11);
insert into vt_fruit_exp values('B509','US','Vegetable','Small' ,'2014-03-01', 34);
insert into vt_fruit_exp values('A402','CA','Fruit'    ,'Small' ,'2016-03-14', 56);
insert into vt_fruit_exp values('A402','CA','Fruit'    ,'Big'   ,'2016-05-22', 76);
insert into vt_fruit_exp values('A402','CA','Fruit'    ,'Small' ,'2016-07-12', 85);
insert into vt_fruit_exp values('A403','CA','Vegetable','Small' ,'2016-07-12', 11);
insert into vt_fruit_exp values('A403','CA','Vegetable','Small' ,'2015-03-25', 16);
insert into vt_fruit_exp values('A403','CA','Vegetable','Big'   ,'2014-03-15', 17);
insert into vt_fruit_exp values('A403','CA','Vegetable','Small' ,'2014-03-01', 12);

create volatile table Table1
( Codes byteint,Code_In_flg byteint) unique primary index (Codes) 
on commit preserve rows
;
insert into Table1 values (11,1); 
insert into Table1 values (76,1);
insert into Table1 values (12,1);

-- Each country-> how many repeated customers for each item_type are present AFTER they purchased Order_size=Big.  Only items purchased with order_size<>Big
-- Country item_type   Count(Distinct(Customer_id))
-- CA  Vegetable   1
-- US  Vegetable   1
-- CA  Fruit       1

SELECT
  Country
, item_type
, count(customer_id) 
FROM (
  select Country,customer_id, t.item_type, count(*)  as REPEATS
  from (
    Select
      t.*
    , Min(Case When Order_Size = 'big' Then Dates End) Over (Partition By Customer_Id, Item_Type) As Min_Big
    From vt_fruit_exp As T
  ) t
where dates > min_big
group by 1,2,3) D
group by 1,2;

-- This works now but I wanted to add one more condition as to only when the codes are within certain table with condition so I wanted to add multiple conditions with one being subquery with the case when I modified my code.

-- use a join to the table that refers to whether the code is to be included or not instead of attempting a subquery withing ordered analytic
SELECT
  Country
, item_type
, count(customer_id)
FROM (
  select Country,customer_id, t.item_type, count(*)  as REPEATS
  from (
    Select
      t.*
    , Min(Case When Order_Size = 'big' And b.Code_In_flg=1 Then Dates End) Over (Partition By Customer_Id, Item_Type) As Min_Big
  from vt_fruit_exp T left outer join Table1 B on t.Codes=b.Codes 
 ) t
where dates > min_big
group by 1,2,3) D
group by 1,2
于 2018-02-17T03:06:49.207 回答