0

我有一个尝试在 EMR PrestoDB 上运行的查询。它正在读取 s3 胶水 ORC 表。我收到“查询失败:语句太大(分析期间堆栈溢出)”错误,但查询本身并不长,只有 1364 行,它实际上在 Athena 中运行良好,只是当我将它移动到运行时直接在 PrestoDB 上失败。

查询是 1364 行,它是 144kb(下面的例子更短,因为我不得不匿名化一些对象)

我正在通过代理跳转 SSH 从 Datagrip IDE 运行它。我想知道是否可能存在一些http问题?

下面是一个查询示例,尽管查询本身更长,因为 StackOverflow 有 30000 个字符的限制。

create
table new_schema.test
with (format = 'ORC')
as (
        select distinct
            sr2_ln
            , sr3_ln
            , sr4_ln
            , sr5_ln
            , sr2_lin
            , sr3_lin
            , sr4_lin
            , sr5_lin
            , is_t1_t3
            , is_t4_t7
            , is_t8
            , is_t10
            , egroup
            , is_f
            , is_v
            , is_d
            , is_t
            , is_b
            , ry
            , snapshot_month_int
            , snapshot_year

            --Window partitions
            , (
                coalesce(sr2_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as ol2_ry_window_partition
            , (
                coalesce(sr2_lin,'_')
                || coalesce(sr3_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as ol3_ry_window_partition
            , (
                coalesce(sr2_lin,'_')
                || coalesce(sr3_lin,'_')
                || coalesce(sr4_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as ol4_ry_window_partition
            , (
                coalesce(sr2_lin,'_')
                || coalesce(sr3_lin,'_')
                || coalesce(sr4_lin,'_')
                || coalesce(sr5_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(is_f,'_')
                || coalesce(is_v,'_')
                || coalesce(is_d,'_')
                || coalesce(is_t, '_')
                || coalesce(is_b, '_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as e_all_up_window_partition
            , (
                coalesce(sr2_lin,'_')
                || coalesce(sr3_lin,'_')
                || coalesce(sr4_lin,'_')
                || coalesce(sr5_lin,'_')
                || coalesce(is_t1_t3,'_')
                || coalesce(is_t4_t7,'_')
                || coalesce(is_t8,'_')
                || coalesce(is_t10,'_')
                || coalesce(egroup,'_')
                || coalesce(is_v,'_')
                || coalesce(is_d,'_')
                || coalesce(is_t, '_')
                || coalesce(is_b, '_')
                || coalesce(ry,'_')
                || cast(snapshot_date as varchar)
                ) as f_all_up_window_partition

            --b metrics
            , b_count as b_current_month
            , sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding) as b_1_month_prior_original
            , sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding) as b_1_year_prior
            , case
                when snapshot_month_int = 1 then b_count
                when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
                when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
                when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
                when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
                when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
                when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
                when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
                when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
                when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
                when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
                when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
                end as b_avg_ytd
            , case
                when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 12 preceding)
                when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 12 preceding)
                when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 12 preceding)
                when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 12 preceding)
                when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 12 preceding)
                when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 12 preceding)
                when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 12 preceding)
                when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 12 preceding)
                when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 12 preceding)
                when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 12 preceding)
                when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 12 preceding)
                end as b_avg_ly_ytd
            , avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row) as b_avg_ttm_as_of_snapshot_date
            , case
                when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
                when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
                when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
                when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
                when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
                when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
                when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
                when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
                when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
                when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
                when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
                when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                end as b_end_of_ly
            , case
                when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
                when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
                when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
                when snapshot_month_int = 4 then b_count
                when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
                when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
                when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
                when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
                when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
                when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
                when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
                when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
                end as b_avg_ytd_for_rt
            , case
                when snapshot_month_int = 1 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 12 preceding)
                when snapshot_month_int = 2 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 12 preceding)
                when snapshot_month_int = 3 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 12 preceding)
                when snapshot_month_int = 4 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                when snapshot_month_int = 5 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 12 preceding)
                when snapshot_month_int = 6 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 12 preceding)
                when snapshot_month_int = 7 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 12 preceding)
                when snapshot_month_int = 8 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 12 preceding)
                when snapshot_month_int = 9 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 12 preceding)
                when snapshot_month_int = 10 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 12 preceding)
                when snapshot_month_int = 11 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 12 preceding)
                when snapshot_month_int = 12 then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 12 preceding)
                end as b_avg_ly_ytd_for_rt
            , case
                when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 21 preceding and 21 preceding)
                when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 22 preceding and 22 preceding)
                when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 23 preceding and 23 preceding)
                when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 13 preceding and 13 preceding)
                when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 14 preceding and 14 preceding)
                when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 15 preceding and 15 preceding)
                when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 16 preceding and 16 preceding)
                when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 17 preceding and 17 preceding)
                when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 18 preceding and 18 preceding)
                when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 19 preceding and 19 preceding)
                when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 20 preceding and 20 preceding)
                end as b_beg_of_ly_for_rt
            , case
                when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
                when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
                when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding)
                when snapshot_month_int = 4 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
                when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
                when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
                when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
                when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
                when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
                when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
                when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
                when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
                end as b_end_of_ly_for_rt
            , case
                when snapshot_month_int = 1 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and 9 preceding)
                when snapshot_month_int = 2 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and 10 preceding)
                when snapshot_month_int = 3 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and 11 preceding)
                when snapshot_month_int = 4 then b_count
                when snapshot_month_int = 5 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding)
                when snapshot_month_int = 6 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and 2 preceding)
                when snapshot_month_int = 7 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and 3 preceding)
                when snapshot_month_int = 8 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and 4 preceding)
                when snapshot_month_int = 9 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and 5 preceding)
                when snapshot_month_int = 10 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and 6 preceding)
                when snapshot_month_int = 11 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and 7 preceding)
                when snapshot_month_int = 12 then sum(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and 8 preceding)
                end as b_beg_of_ty_for_rt

            , case when is_t = 'Y' then b_count end as t_b_current_month
            , case when is_t = 'Y' then sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and 1 preceding) end as t_b_1_month_prior
            , case when is_t = 'Y' then sum(b_count) over ( partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 12 preceding and 12 preceding) end as t_b_1_year_prior
            , case
                when snapshot_month_int = 1 and is_t = 'Y' then b_count
                when snapshot_month_int = 2 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 1 preceding and current row)
                when snapshot_month_int = 3 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 2 preceding and current row)
                when snapshot_month_int = 4 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 3 preceding and current row)
                when snapshot_month_int = 5 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 4 preceding and current row)
                when snapshot_month_int = 6 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 5 preceding and current row)
                when snapshot_month_int = 7 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 6 preceding and current row)
                when snapshot_month_int = 8 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 7 preceding and current row)
                when snapshot_month_int = 9 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 8 preceding and current row)
                when snapshot_month_int = 10 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 9 preceding and current row)
                when snapshot_month_int = 11 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 10 preceding and current row)
                when snapshot_month_int = 12 and is_t = 'Y' then avg(b_count) over (partition by grain_window_partition order by grain_window_partition, snapshot_date rows between 11 preceding and current row)
                end as t_b_avg_ytd
            
            , snapshot_date
            , snapshot_date as snapshot_date_partition

        from schema.table 
        where cast(snapshot_date as timestamp) > cast('2019-01-01' as date)

) --CTAS 结束

4

0 回答 0