我有一个尝试在 EMR PrestoDB 上运行的查询。它正在读取 s3 胶水 ORC 表。我收到“查询失败:语句太大(分析期间堆栈溢出)”错误,但查询本身并不长,只有 1364 行,它实际上在 Athena 中运行良好,只是当我将它移动到运行时直接在 PrestoDB 上失败。
查询是 1364 行,它是 144kb(下面的例子更短,因为我不得不匿名化一些对象)
我正在通过代理跳转 SSH 从 Datagrip IDE 运行它。我想知道是否可能存在一些http问题?
下面是一个查询示例,尽管查询本身更长,因为 StackOverflow 有 30000 个字符的限制。
create
table new_schema.test
with (format = 'ORC')
as (
select distinct
sr2_ln
, sr3_ln
, sr4_ln
, sr5_ln
, sr2_lin
, sr3_lin
, sr4_lin
, sr5_lin
, is_t1_t3
, is_t4_t7
, is_t8
, is_t10
, egroup
, is_f
, is_v
, is_d
, is_t
, is_b
, ry
, snapshot_month_int
, snapshot_year
--Window partitions
, (
coalesce(sr2_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as ol2_ry_window_partition
, (
coalesce(sr2_lin,'_')
|| coalesce(sr3_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as ol3_ry_window_partition
, (
coalesce(sr2_lin,'_')
|| coalesce(sr3_lin,'_')
|| coalesce(sr4_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as ol4_ry_window_partition
, (
coalesce(sr2_lin,'_')
|| coalesce(sr3_lin,'_')
|| coalesce(sr4_lin,'_')
|| coalesce(sr5_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(is_f,'_')
|| coalesce(is_v,'_')
|| coalesce(is_d,'_')
|| coalesce(is_t, '_')
|| coalesce(is_b, '_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as e_all_up_window_partition
, (
coalesce(sr2_lin,'_')
|| coalesce(sr3_lin,'_')
|| coalesce(sr4_lin,'_')
|| coalesce(sr5_lin,'_')
|| coalesce(is_t1_t3,'_')
|| coalesce(is_t4_t7,'_')
|| coalesce(is_t8,'_')
|| coalesce(is_t10,'_')
|| coalesce(egroup,'_')
|| coalesce(is_v,'_')
|| coalesce(is_d,'_')
|| coalesce(is_t, '_')
|| coalesce(is_b, '_')
|| coalesce(ry,'_')
|| cast(snapshot_date as varchar)
) as f_all_up_window_partition
--b metrics
, b_count as b_current_month
, sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding) as b_1_month_prior_original
, sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding) as b_1_year_prior
, case
when snapshot_month_int = 1 then b_count
when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
end as b_avg_ytd
, case
when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 12 preceding)
when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 12 preceding)
when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 12 preceding)
when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 12 preceding)
when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 12 preceding)
when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 12 preceding)
when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 12 preceding)
when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 12 preceding)
when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 12 preceding)
when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 12 preceding)
when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 12 preceding)
end as b_avg_ly_ytd
, avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row) as b_avg_ttm_as_of_snapshot_date
, case
when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
end as b_end_of_ly
, case
when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
when snapshot_month_int = 4 then b_count
when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
end as b_avg_ytd_for_rt
, case
when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 12 preceding)
when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 12 preceding)
when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 12 preceding)
when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 12 preceding)
when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 12 preceding)
when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 12 preceding)
when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 12 preceding)
when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 12 preceding)
when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 12 preceding)
when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 12 preceding)
when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 12 preceding)
end as b_avg_ly_ytd_for_rt
, case
when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 21 preceding)
when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 22 preceding)
when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 23 preceding)
when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 13 preceding)
when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 14 preceding)
when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 15 preceding)
when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 16 preceding)
when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 17 preceding)
when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 18 preceding)
when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 19 preceding)
when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 20 preceding)
end as b_beg_of_ly_for_rt
, case
when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
end as b_end_of_ly_for_rt
, case
when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
when snapshot_month_int = 4 then b_count
when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
end as b_beg_of_ty_for_rt
, case when is_t = 'Y' then b_count end as t_b_current_month
, case when is_t = 'Y' then sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding) end as t_b_1_month_prior
, case when is_t = 'Y' then sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding) end as t_b_1_year_prior
, case
when snapshot_month_int = 1 and is_t = 'Y' then b_count
when snapshot_month_int = 2 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
when snapshot_month_int = 3 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
when snapshot_month_int = 4 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
when snapshot_month_int = 5 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
when snapshot_month_int = 6 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
when snapshot_month_int = 7 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
when snapshot_month_int = 8 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
when snapshot_month_int = 9 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
when snapshot_month_int = 10 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
when snapshot_month_int = 11 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
when snapshot_month_int = 12 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
end as t_b_avg_ytd
, snapshot_date
, snapshot_date as snapshot_date_partition
from schema.table
where cast(snapshot_date as timestamp) > cast('2019-01-01' as date)
) --CTAS 结束