当从先前插入的行中随机分配父级时,无法控制树的高度(级别数)和级别的填充方式,这在某些情况下可能不需要。
逐级使用数据填充树可能更方便。
辅助表值函数用于使用 Itzik 的交叉连接 CTE 方法生成数字序列(参见例如这里关于它)
create function ftItziksCJCTE
(
@cnt int
)
returns table as
return
(
WITH
E00(N) AS (SELECT 1 UNION ALL SELECT 1),
E02(N) AS (SELECT 1 FROM E00 a, E00 b),
E04(N) AS (SELECT 1 FROM E02 a, E02 b),
E08(N) AS (SELECT 1 FROM E04 a, E04 b),
E16(N) AS (SELECT 1 FROM E08 a, E08 b),
E32(N) AS (SELECT 1 FROM E16 a, E16 b),
E(N) AS (SELECT ROW_NUMBER() OVER (ORDER BY N) FROM E32)
select N from E where N <= @cnt
)
控制树中元素分布的简单表格:
create table #TreeLevels
(
LevelNo int identity(1, 1) not NULL,
MinElements int not NULL,
MaxElements int not NULL,
primary key clustered (LevelNo)
)
样本分布:
insert into #TreeLevels values (7, 10)
insert into #TreeLevels values (70, 100)
insert into #TreeLevels values (700, 1000)
会给我们类似 7 到 10 个 ParentID = NULL 的元素,每个元素都有 70 到 100 个元素等。元素总数为 343000 到 1000000
或其他分布:
insert into #TreeLevels values (1, 1)
insert into #TreeLevels values (9, 15)
insert into #TreeLevels values (10, 12)
insert into #TreeLevels values (9, 15)
insert into #TreeLevels values (10, 12)
insert into #TreeLevels values (9, 15)
insert into #TreeLevels values (10, 12)
这意味着将有一个根元素,其中包含 9 到 15 个子元素,每个子元素都有 10 到 12 个元素等。
然后可以逐级填充树:
declare @levelNo int, @eMin int, @eMax int
create table #Inserted (ID int not NULL, primary key nonclustered (ID))
create table #Inserted2 (ID int not NULL, primary key nonclustered (ID))
set @levelNo = 1
while 1=1
begin
select @eMin = MinElements, @eMax = MaxElements from #TreeLevels where LevelNo = @levelNo
if @@ROWCOUNT = 0
break
if @levelNo = 1
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select NULL from ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0))
end
else
begin
if exists (select 1 from #Inserted)
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted2 (ID)
select
I.ID
from
#Inserted I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
truncate table #Inserted
end
else
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select
I.ID
from
#Inserted2 I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
truncate table #Inserted2
end
end
set @levelNo = @levelNo + 1
end
但是,无法控制树将包含的确切元素数量,并且叶节点仅位于最后一层。最好有额外的参数控制级别人口(同一级别上将有子节点的百分比)。
create table #TreeLevels
(
LevelNo int identity(1, 1) not NULL,
MinElements int not NULL,
MaxElements int not NULL,
PopulatedPct float NULL,
primary key clustered (LevelNo)
)
样本分布:
insert into #TreeLevels values (1, 1, NULL)
insert into #TreeLevels values (9, 15, NULL)
insert into #TreeLevels values (10, 12, NULL)
insert into #TreeLevels values (9, 15, 80)
insert into #TreeLevels values (10, 12, 65)
insert into #TreeLevels values (9, 15, 35)
insert into #TreeLevels values (10, 12, NULL)
PopulatedPct 百分比的 NULL 被视为 100%。PopulatedPct 控制下一级填充,并且应该在循环期间从上一级获取。因此,它对于#TreeLevels 中的最后一行也没有任何意义。
现在我们可以在考虑 PopulatedPct 的情况下循环波谷水平。
declare @levelNo int, @eMin int, @eMax int
create table #Inserted (ID int not NULL, primary key nonclustered (ID))
create table #Inserted2 (ID int not NULL, primary key nonclustered (ID))
set @levelNo = 1
while 1=1
begin
select @eMin = MinElements, @eMax = MaxElements from #TreeLevels where LevelNo = @levelNo
if @@ROWCOUNT = 0
break
if @levelNo = 1
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select NULL from ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0))
end
else
begin
declare @pct float
select @pct = PopulatedPct from #TreeLevels where LevelNo = @levelNo - 1
if exists (select 1 from #Inserted)
begin
if (@pct is NULL)
insert into TestTree (ParentID)
output inserted.ID into #Inserted2 (ID)
select
I.ID
from
#Inserted I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
else
insert into TestTree (ParentID)
output inserted.ID into #Inserted2 (ID)
select
I.ID
from
(select top (@pct) PERCENT ID from #Inserted order by rand(checksum(newid()))) I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
truncate table #Inserted
end
else
begin
if (@pct is NULL)
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select
I.ID
from
#Inserted2 I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
else
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select
I.ID
from
(select top (@pct) PERCENT ID from #Inserted2 order by rand(checksum(newid()))) I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
truncate table #Inserted2
end
end
set @levelNo = @levelNo + 1
end
仍然无法控制元素的确切数量,但可以更好地控制树的形状。