因此,您可以通过取平均值来获得每件商品的估计/平均/假货价格
WITH data_table AS (
SELECT * FROM VALUES
(1,'2020-01-27',517.66,'Rocking Chair'),
(1,'2020-01-27',517.66,'Plush Animal'),
(1,'2020-01-27',517.66,'Rug'),
(1,'2020-01-27',517.66,'Couch'),
(1,'2020-01-27',517.66,'Bar Stool'),
(2,'2020-01-28',59.09,'Painting'),
(2,'2020-01-28',59.09,'Rug')
v(trans_unique_key, fiscal_dt, sale_amt, prdct_desc)
)
SELECT a.*
,sum(fake_item_cost_2dp)over(partition by trans_unique_key) AS sum_of_parts_not_eqaul_the_whole
FROM (
SELECT *
,SALE_AMT/count(*)over(partition by trans_unique_key) as FAKE_ITEM_COST
,round(FAKE_ITEM_COST,2) AS fake_item_cost_2dp
FROM data_table
) AS a
ORDER BY 2,1;
这使:
TRANS_UNIQUE_KEY FISCAL_DT SALE_AMT PRDCT_DESC FAKE_ITEM_COST FAKE_ITEM_COST_2DP SUM_OF_PARTS_NOT_EQAUL_THE_WHOLE
1 2020-01-27 517.66 Rocking Chair 103.53200000 103.53 517.65
1 2020-01-27 517.66 Plush Animal 103.53200000 103.53 517.65
1 2020-01-27 517.66 Rug 103.53200000 103.53 517.65
1 2020-01-27 517.66 Couch 103.53200000 103.53 517.65
1 2020-01-27 517.66 Bar Stool 103.53200000 103.53 517.65
2 2020-01-28 59.09 Painting 29.54500000 29.55 59.10
2 2020-01-28 59.09 Rug 29.54500000 29.55 59.10
通常认为舍入和求和,或平均再求和是不好的,和/或浮点数不稳定。
但是我的主要观点之一是 fake_per_item_price 没有多大意义,除非你要再次聚合回交易级别,此时就会有一些事情,比如ANY_VALUE
让更有意义。
WITH data_table AS (
SELECT * FROM VALUES
(1,'2020-01-27',517.66,'Rocking Chair'),
(1,'2020-01-27',517.66,'Plush Animal'),
(1,'2020-01-27',517.66,'Rug'),
(1,'2020-01-27',517.66,'Couch'),
(1,'2020-01-27',517.66,'Bar Stool'),
(2,'2020-01-28',59.09,'Painting'),
(2,'2020-01-28',59.09,'Rug')
v(trans_unique_key, fiscal_dt, sale_amt, prdct_desc)
)
SELECT trans_unique_key, fiscal_dt, ANY_VALUE(sale_amt) as sale_amt, count(*) as total_items_count, count(distinct prdct_desc) as distinct_items_count
FROM data_table
GROUP BY 1,2
ORDER BY 2,1;
给予:
TRANS_UNIQUE_KEY FISCAL_DT SALE_AMT TOTAL_ITEMS_COUNT DISTINCT_ITEMS_COUNT
1 2020-01-27 517.66 5 5
2 2020-01-28 59.09 2 2