1

I have the following query

WITH time_series AS (
  SELECT * 
  FROM generate_series(now() - interval '1days', now(), INTERVAL '1 hour') AS ts
), recent_instances AS (
  SELECT instance_id, 
         (CASE WHEN last_update_granted_ts IS NOT NULL THEN last_update_granted_ts ELSE created_ts END), 
         version, 
         4 status 
  FROM instance_application 
  WHERE group_id=$1 
  AND last_check_for_updates >= now() - interval '1days' 
  ORDER BY last_update_granted_ts DESC
), instance_versions AS (
  SELECT instance_id, created_ts, version, status 
  FROM instance_status_history 
  WHERE instance_id IN (SELECT instance_id 
                        FROM recent_instances) 
    AND status = 4 
  UNION 
  (SELECT * FROM recent_instances) 
  ORDER BY created_ts DESC
)
SELECT ts, 
       (CASE WHEN version IS NULL THEN '' ELSE version END), 
       sum(CASE WHEN version IS NOT null THEN 1 ELSE 0 END) total 
FROM (
  SELECT * 
  FROM time_series 
    LEFT JOIN LATERAL (
      SELECT distinct ON (instance_id) instance_Id, version, created_ts 
      FROM instance_versions 
      WHERE  created_ts <= time_series.ts 
      ORDER BY instance_Id, created_ts DESC
  ) _ ON true
) AS _
GROUP BY 1,2
ORDER BY ts DESC;

So instance_versions subquery is executed with every value of timestamps generated from time_series query(see the last select statement). But for some reason the lateral join is very slow,the rows returned by the subquery of lateral join ranges in around 12k-15k(for a single timestamp from time_series query) which is not a big number and the final no of rows returned after the Lateral join ranges from 250k-350k. Is there a way i can optimize this?

4

0 回答 0