在 PostgreSQL 9.4.4 中,我们有一个非常大的带有 if 和 elsif 语句的 plpgsql 函数在每个 if-body 中都有对 stable-sql 函数的函数调用。
我们通过以下方式调用该函数:
SELECT *
from rawdata.getNumbersForUserBasedMetricEventsGroupedByClient('2015-09-28','2015-10-28','{4}'::int[],2,null,null,null,null,null);
前 4-5 次函数在大约2.5 秒内执行得相当快,但随后突然性能迅速下降,执行大约需要7.5 秒。对于所有连续呼叫,它都保持在该水平。我们还尝试将 plpgsql 函数声明为稳定的,但这没有帮助。
当我们直接调用内部 stable-sql 函数之一时,执行总是需要大约 2.5 秒。
这是 rawdata.metricevent 表的 Schema:
rawdata.metricevent (metriceventid bigint PRIMARY KEY,
metricevent integer,
client integer,
age integer,
country varchar(256),
userideventowner bigint,
contributoruserid bigint,
tournamentid bigint,
eventoccurtime timestamp,
iscounted boolean)
我们在 eventoccurtime 列上有一个 btree 索引。如果没有 btree 索引,差异会更大,执行有时只需几秒钟即可完成,但有时会持续超过 100 秒。
现在我们的问题是:为什么会这样?这是怎么回事,当plpgsql函数执行第5次或第6次时,为什么突然花了这么长时间?顺便说一句,此查询的 CPU 负载也非常高。我们还使用 EXPLAIN ANALYZE 对查询进行了分析,查询规划器 ALWAYS 大约需要 0.034 毫秒,但查询执行时间从 2.5 秒到 7.5 秒不等。而且它也永远不会介于两者之间,要么是 2.5 秒,要么是 7.5 秒。
这些是具有可变执行时间的 Main-pgpsql 函数和下面具有恒定执行时间的 stable-sql 函数。
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUserBasedMetricEventsGroupedByClient(pFrom timestamp, pTo timestamp, pMetricEvent integer[], pTimeDomainType integer,
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
BEGIN
IF pTimeDomainType = 1 THEN
--hours
RETURN QUERY
SELECT * FROM rawdata.getNumbersForUBMetricEventsGroupedByClientPerHours(pFrom,pTo,pMetricEvent,pCountry,pAgeFrom,pAgeTo,pUserLanguage,pTournamentLanguage);
ELSIF pTimeDomainType = 2 THEN
--days
RETURN QUERY
SELECT * FROM rawdata.getNumbersForUBMetricEventsGroupedByClientPerDays(pFrom,pTo,pMetricEvent,pCountry,pAgeFrom,pAgeTo,pUserLanguage,pTournamentLanguage);
ELSIF pTimeDomainType = 3 THEN
--week
RETURN QUERY
SELECT * FROM rawdata.getNumbersForUBMetricEventsGroupedByClientPerWeeks(pFrom,pTo,pMetricEvent,pCountry,pAgeFrom,pAgeTo,pUserLanguage,pTournamentLanguage);
ELSIF pTimeDomainType = 4 THEN
--month
RETURN QUERY
SELECT * FROM rawdata.getNumbersForUBMetricEventsGroupedByClientPerMonths(pFrom,pTo,pMetricEvent,pCountry,pAgeFrom,pAgeTo,pUserLanguage,pTournamentLanguage);
END IF;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUBMetricEventsGroupedByClientPerHours(pFrom timestamp, pTo timestamp, pMetricEvent integer[],
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
SELECT hours timedomain,count(distinct em.userideventowner) as x,count(distinct ef.userideventowner) as y,count(distinct emh.userideventowner) as xx,count(distinct efh.userideventowner) as yy
FROM generate_series
( pFrom::timestamp
, pTo::timestamp + '23 hour'
, '1 hour'::interval) hours
LEFT JOIN rawdata.metricevent e1 ON e1.eventoccurtime >=pFrom
AND e1.eventoccurtime < pTo + '1 day'
AND (e1.metricevent = ANY (pMetricEvent))
AND (e1.country = pCountry OR pCountry is null)
AND (e1.age >= pAgeFrom OR pAgeFrom is null) AND (e1.age <= pAgeTo OR pAgeTo is null)
AND userideventowner >= 110
AND hours = date_trunc('hour',e1.eventoccurtime)
LEFT JOIN rawdata.userlanguage ul ON e1.userideventowner = ul.userideventowner
AND (ul.userlanguage = pUserLanguage OR pUserLanguage is null)
LEFT JOIN rawdata.metricevent ei ON e1.metriceventid = em.metriceventid AND ei.client=1
LEFT JOIN rawdata.metricevent ea ON e1.metriceventid = ef.metriceventid AND ea.client=2
LEFT JOIN rawdata.metricevent ew ON e1.metriceventid = emh.metriceventid AND ew.client=3
LEFT JOIN rawdata.metricevent eww ON e1.metriceventid = efh.metriceventid AND eww.client=4
GROUP BY hours
ORDER BY hours;
$$
LANGUAGE sql STABLE;
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUBMetricEventsGroupedByClientPerDays(pFrom timestamp, pTo timestamp, pMetricEvent integer[],
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
SELECT days timedomain,count(distinct em.userideventowner) as x,count(distinct ef.userideventowner) as y,count(distinct emh.userideventowner) as xx,count(distinct efh.userideventowner) as yy
FROM generate_series
( pFrom::timestamp
, pTo::timestamp
, '1 day'::interval) days
LEFT JOIN rawdata.metricevent e1 ON e1.eventoccurtime >=pFrom
AND e1.eventoccurtime < pTo + '1 day'
AND (e1.metricevent = ANY (pMetricEvent))
AND (e1.country = pCountry OR pCountry is null)
AND (e1.age >= pAgeFrom OR pAgeFrom is null) AND (e1.age <= pAgeTo OR pAgeTo is null)
AND userideventowner >= 110
AND days = date_trunc('day',e1.eventoccurtime)
LEFT JOIN rawdata.userlanguage ul ON e1.userideventowner = ul.userideventowner
AND (ul.userlanguage = pUserLanguage OR pUserLanguage is null)
LEFT JOIN rawdata.metricevent ei ON e1.metriceventid = em.metriceventid AND ei.client=1
LEFT JOIN rawdata.metricevent ea ON e1.metriceventid = ef.metriceventid AND ea.client=2
LEFT JOIN rawdata.metricevent ew ON e1.metriceventid = emh.metriceventid AND ew.client=3
LEFT JOIN rawdata.metricevent eww ON e1.metriceventid = efh.metriceventid AND eww.client=4
GROUP BY days
ORDER BY days;
$$
LANGUAGE sql STABLE;
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUBMetricEventsGroupedByClientPerWeeks(pFrom timestamp, pTo timestamp, pMetricEvent integer[],
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
SELECT min(days) timedomain,count(distinct em.userideventowner) as x,count(distinct ef.userideventowner) as y,count(distinct emh.userideventowner) as xx,count(distinct efh.userideventowner) as yy
FROM generate_series
( pFrom::timestamp
, pTo::timestamp
, '1 day'::interval) days
LEFT JOIN rawdata.metricevent e1 ON e1.eventoccurtime >=pFrom
AND e1.eventoccurtime < pTo + '1 day'
AND (e1.metricevent = ANY (pMetricEvent))
AND (e1.country = pCountry OR pCountry is null)
AND (e1.age >= pAgeFrom OR pAgeFrom is null) AND (e1.age <= pAgeTo OR pAgeTo is null)
AND userideventowner >= 110
AND days = date_trunc('day',e1.eventoccurtime)
LEFT JOIN rawdata.userlanguage ul ON e1.userideventowner = ul.userideventowner
AND (ul.userlanguage = pUserLanguage OR pUserLanguage is null)
LEFT JOIN rawdata.metricevent ei ON e1.metriceventid = em.metriceventid AND ei.client=1
LEFT JOIN rawdata.metricevent ea ON e1.metriceventid = ef.metriceventid AND ea.client=2
LEFT JOIN rawdata.metricevent ew ON e1.metriceventid = emh.metriceventid AND ew.client=3
LEFT JOIN rawdata.metricevent eww ON e1.metriceventid = efh.metriceventid AND eww.client=4
GROUP BY EXTRACT(WEEK FROM days)
ORDER BY 1;
$$
LANGUAGE sql STABLE;
CREATE OR REPLACE FUNCTION rawdata.getNumbersForUBMetricEventsGroupedByClientPerMonths(pFrom timestamp, pTo timestamp, pMetricEvent integer[],
pCountry varchar(100),pAgeFrom integer,pAgeTo integer,pUserlanguage varchar(50),pTournamentlanguage varchar(50))
RETURNS TABLE(dfrom timestamp, x bigint, y bigint, xx bigint, yy bigint)
AS $$
SELECT min(days) timedomain,count(distinct em.userideventowner) as x,count(distinct ef.userideventowner) as y,count(distinct emh.userideventowner) as xx,count(distinct efh.userideventowner) as yy
FROM generate_series
( pFrom::timestamp
, pTo::timestamp
, '1 day'::interval) days
LEFT JOIN rawdata.metricevent e1 ON e1.eventoccurtime >=pFrom
AND e1.eventoccurtime < pTo + '1 day'
AND (e1.metricevent = ANY (pMetricEvent))
AND (e1.country = pCountry OR pCountry is null)
AND (e1.age >= pAgeFrom OR pAgeFrom is null) AND (e1.age <= pAgeTo OR pAgeTo is null)
AND userideventowner >= 110
AND days = date_trunc('day',e1.eventoccurtime)
LEFT JOIN rawdata.userlanguage ul ON e1.userideventowner = ul.userideventowner
AND (ul.userlanguage = pUserLanguage OR pUserLanguage is null)LEFT JOIN rawdata.metricevent ei ON e1.metriceventid = em.metriceventid AND ei.client=1
LEFT JOIN rawdata.metricevent ea ON e1.metriceventid = ef.metriceventid AND ea.client=2
LEFT JOIN rawdata.metricevent ew ON e1.metriceventid = emh.metriceventid AND ew.client=3
LEFT JOIN rawdata.metricevent eww ON e1.metriceventid = efh.metriceventid AND eww.client=4
GROUP BY EXTRACT(MONTH FROM days)
ORDER BY 1;
$$
LANGUAGE sql STABLE;
亲切的问候,托马斯