我有一个用逗号分隔的年份字符串。
例如2000,2001,2002,2005,2006,2007 and 2010
.
我想对连续的数字进行分组。
我的输出应该是2000-2003,2005-2007 and 2010
. 在 Oracle 存储过程中有没有办法做到这一点?
我有一个用逗号分隔的年份字符串。
例如2000,2001,2002,2005,2006,2007 and 2010
.
我想对连续的数字进行分组。
我的输出应该是2000-2003,2005-2007 and 2010
. 在 Oracle 存储过程中有没有办法做到这一点?
免责声明 - 我不建议“按原样”使用此解决方案,但它可以提供想法,而且编写它很有趣
我假设您在表格中有一个包含 csv 字符串的列。
如果您使用的是 oracle 11gR2,那么您可以使用递归 CTE——
这是一个 sqlfiddle 演示
with t as
(
select replace(replace(v, ' and ', ','), ' ','') v
from strings
),
rcte(text, token, res) as
(
select v, regexp_substr(v, '^\d*[^,]'), regexp_substr(v, '^\d*[^,]') ||'-'
from t
union all
select regexp_replace(text, '^\d*,', ''),
regexp_substr(text, '^\d*[^,]'),
case when regexp_substr(text, '^\d*[^,]') = token then
res
when regexp_substr(text, '^\d*[^,]') = token+1 then
regexp_replace(res, '-\d*$', '-'||(token+1))
else rtrim(res, '-') || ',' || regexp_substr(text, '^\d*[^,]') || '-'
end
from rcte
where text <> token
)
select rtrim(res, '-') from rcte
where text = regexp_substr(rtrim(res, '-'), '\d*$');
(这也可以在没有正则表达式的情况下完成)
可以通过使用分析函数使用 SQL 来完成。
*更新 1:*使用解析器功能更新的答案,在以前的版本中遗漏了。
*更新 2: *添加了最终的字符串组合
with p as ( -- Parameter string
select replace('2000,2001,2002,2005,2006,2007 and 2010',' and ',',') s from dual
),
ex as ( -- Parse string to sequence
select
to_number(
substr(
s,
decode( level, 1, 1, instr(s,',',1,level-1)+1 ),
decode( instr(s,',',1,level), 0, length(s)+1, instr(s,',',1,level) )
-
decode( level, 1, 1, instr(s,',',1,level-1)+1 )
)
) as y
from p
connect by instr(s,',',1,level-1) > 0
),
period_set as (
select -- Make final string for each interval start
y,
lag(y) over (order by y) prior_y,
max(y) over (partition by 1) max_y,
y || (case when is_end > 1 then null else '-' ||end_y end) as interval_string
from
( -- For each start find interval end
select
y,
is_start,
is_end,
lead(y) over (order by y) end_y
from
( -- Find if previous/next value differs more then by one.
-- If so, mark as start/end
select
y,
nvl(y - prev_y, 100) is_start,
nvl(next_y - y, 100) is_end
from
( -- Find previous/next value in sequence
select
y,
lag(y) over (order by y) prev_y,
lead(y) over (order by y) next_y
from ex
)
)
where
is_start > 1 or is_end > 1
)
where is_start > 1
)
select
replace(
substr(
sys_connect_by_path(
decode(y,max_y,'m', null) || interval_string,
','
),2
),
',m',
' and '
) result_str
from
period_set
where
connect_by_isleaf = 1
start with
prior_y is null
connect by
prior y = prior_y
SQL Fiddle 可以在这里找到。
假设您的数据将以逗号分隔并输入,您可以提及以逗号分隔的任何年份。
DECLARE
v_str VARCHAR(100) := '&n';
v_instr NUMBER;
v_instr1 NUMBER;
v_g VARCHAR(50);
v_F VARCHAR(50);
v_OUT VARCHAR(50);
v_OUT1 VARCHAR(50);
v_TEMP VARCHAR(50);
v_TMP VARCHAR(50) := ' ';
v_cnt NUMBER :=0;
V_FLAG NUMBER :=1;
BEGIN
FOR i IN 1..Length(v_str)-Length(REPLACE(v_str,',',''))+1 LOOP
IF i = 1 THEN
v_g := SubStr(v_str,1,InStr(v_str,',',1,i)-1);
V_F := V_G;
ELSE
v_instr := InStr(v_str,',',1,i-1);
v_instr1 := InStr(v_str,',',1,i);
IF(v_cnt+1 <= Length(v_str)-Length(REPLACE(v_str,',',''))) then
v_g := SubStr(v_str,v_instr+1,v_instr1-v_instr-1);
IF V_FLAG = 0 THEN V_F := V_G; V_FLAG :=1; END IF;
ELSE
v_g := SubStr(v_str,v_instr+1);
IF V_FLAG = 0 THEN V_F := V_G; V_FLAG :=1; END IF;
END IF;
END IF;
v_cnt := v_cnt+1;
--IF(I>1) THEN
IF(V_TEMP+1 = V_G) THEN
IF(V_OUT IS not NULL) THEN
V_OUT := V_OUT||'-'||V_G;
ELSE
V_OUT := V_F||'-'||V_G;
END IF;
ELSE
V_OUT1 := SubStr(V_OUT,1,5)||SubStr(V_OUT,-4);
V_OUT := NULL;
v_out := v_g;
V_FLAG := 0;
END IF;
--END IF;
V_TEMP := To_Number(V_G);
--Dbms_Output.put_line(v_g);
IF(v_tmp <> v_out1) THEN
SELECT Decode(instr(v_OUT1,'-'),0,subStr(V_OUT1,1,4),v_out1) INTO v_out1 FROM dual;
Dbms_Output.put_line('Year span : '||v_OUT1);
v_tmp := v_out1;
END IF;
END LOOP;
SELECT Decode(Length(v_OUT),4,v_out,subStr(V_OUT,1,5)||SubStr(V_OUT,-4)) INTO v_out1 FROM dual;
Dbms_Output.put_line('Year span : '||v_out1);
END;
使用数学
select min(seq) as range_from , max(seq) range_to , count(*) as cnt
from sometable
group by ceil(seq/3) * 3
这部分 ceil(seq/3) * 3
是将数字四舍五入到最接近的三的倍数。如果您想要 5 的范围,请使用ceil(seq/5) * 5
. 干杯!
一个很好的问题! 请检查我的逻辑...
with test as
(
select '2000,2002,2003,2004,2006,2007' str from dual
)
,test1 as (
select
split1,
lead(split1, 1, null) over (order by split1 asc) lead_no,
level1
from
(
select to_number(regexp_substr (str, '[^,]+', 1, rownum)) split1, level as level1
from test b
connect by level <= length (regexp_replace (str, '[^,]+')) + 1
)x
)
--select * from test1
,test2 (split1, lead_no, level1, op, op1) as(
select
split1,
lead_no,
level1,
(case when split1+1=lead_no then to_char(split1) else NULL end),
(case when split1+1=lead_no then NULL else to_char(split1) end)
from test1
where level1=1
union all
select
a.split1,
a.lead_no,
b.level1+1,
(case when a.split1+1=a.lead_no and to_char(b.op) is not null then to_char(b.op)
when a.split1+1=a.lead_no and to_char(b.op) is null then to_char(a.split1)
else null end),
(case when (a.split1+1<>a.lead_no and to_char(b.op)<>to_char(a.split1)) OR
(a.lead_no is null and to_char(b.op) is not null) then to_char(b.op) ||'-'||to_char(a.split1)
when a.lead_no is null then to_char(a.split1)
else null end)
from test1 a inner join test2 b on a.level1 = b.level1+1
)
select op1 from test2
where op1 is not null
18:42:15 SYSTEM@dwal> l
1 with p as (
2 select replace('2000,2001,2002,2005,2006,2007 and 2010',' and ',',') s, '[0-9]{4}' r from dual
3 ), ex as (
4 select regexp_substr(s,r, 1, level) as y
5 from p
6 connect by level <= regexp_count(s, r)
7 ), grp as (
8 select connect_by_root(y) s, y
9 from ( select e1.y y, e2.y p from ex e1, ex e2 where e1.y - 1 = e2.y(+) )
10 connect by prior y = p
11 start with p is null
12 ), agg as (
13 select listagg(s||decode(max(y), s, null, '-'||max(y)), ',') within group (order by s) str
14 from grp group by s
15 )
16* select regexp_replace(str, ',', ' and ', 1, regexp_count(str, ',')) result from agg
18:42:16 SYSTEM@dwal> /
RESULT
------------------------------
2000-2002,2005-2007 and 2010
Elapsed: 00:00:00.02
您必须使用cursor来循环这些年,并像这样进行一些思考:
CREATE OR REPLACE Function concatYears
RETURN varchar;
DECLARE
oldYear number;
concat varchar(300);
newGroup boolean;
cursor cur1 is
SELECT year
FROM dates
ORDER BY year ASC;
BEGIN
oldYear:=0;
newGroup:=true;
concat := '';
FOR row in c1
IF oldYear == 0 THEN
oldYear := row.year;
END IF;
IF newGroup == true THEN
concat := concat || CAST(oldYear AS varchar(4));
newGroup:= false;
ELSE
IF row.year > oldYear+1 THEN
concat:= concat || '-' || CAST(oldYear AS varchar(4)) || ' , ';
newGroup:=true;
END IF;
END IF;
oldYear:=row.year;
END LOOP;
RETURN concat;
END;