4

有一个名为 的 SQL Server 未记录的扩展存储过程xp_dirtree,它可以以表格格式返回所有文件和文件夹名称(包括子文件夹)。为了练习我对递归 CTE 的理解,我决定使用它来获取指定文件夹(包括子文件夹)中所有文件的完整路径。但是,经过一个小时的挠头,我仍然无法弄清楚正确的方法。以下代码是我目前所拥有的。这个目的可以用递归 CTE 来实现吗?

DECLARE @dir NVARCHAR(260) ;
SELECT  @dir = N'c:\temp' ;

IF RIGHT(@dir, 1) <> '\' 
    SELECT  @dir = @dir + '\' ;

IF OBJECT_ID('tempdb..#dirtree', 'U') IS NOT NULL 
    DROP TABLE #dirtree ;
CREATE TABLE #dirtree
(
 id INT PRIMARY KEY
        IDENTITY,
 subdirectory NVARCHAR(260),
 depth INT,
 is_file BIT
) ;

INSERT  INTO #dirtree
        EXEC xp_dirtree 
            @dir,
            0,
            1 ;

SELECT  *
FROM    #dirtree ;

WITH    files
          AS (
              SELECT    id,
                        subdirectory,
                        depth,
                        is_file, subdirectory AS path
              FROM      #dirtree
              WHERE     is_file = 1
                        AND depth <> 1
   UNION ALL
               -- ...
             )
    SELECT  *
    FROM    files ;

假设 xp_dirtree 输出为:

/*
id  subdirectory   depth   is_file
--- -------------- ------- -------
1   abc.mdf        1       1
2   a              1       0
3   a.txt          2       1
4   b.txt          2       1
5   a.rb           1       1
6   aaa.flv        1       1
*/

我想要的是:

/*
path
------------------
c:\temp\abc.mdf
c:\temp\a\a.txt
c:\temp\a\b.txt
c:\temp\a.rb
c:\temp\aaa.flv
*/
4

4 回答 4

4

如果我理解你是正确的,你想要这样的东西:

测试数据:

CREATE TABLE #dirtree
(
    id INT,
    subdirectory NVARCHAR(260),
    depth INT ,
    is_file BIT,
    parentId INT
)

INSERT INTO #dirtree(id,subdirectory,depth,is_file)
VALUES
    (1,'abc.mdf',1,1),(2,'a',1,0),(3,'a.txt',2,1),
    (4,'b.txt',2,1),(5,'a.rb',1,1),(6,'aaa.flv',1,1)

更新了父 ID

UPDATE #dirtree
SET ParentId = (SELECT MAX(Id) FROM #dirtree
      WHERE Depth = T1.Depth - 1 AND Id < T1.Id)
FROM #dirtree T1

询问

;WITH CTE
AS
(
    SELECT
        t.id,
        t.subdirectory,
        t.depth,
        t.is_file
    FROM
        #dirtree AS t
    WHERE
        is_file=0
    UNION ALL
    SELECT
        t.id,
        CAST(CTE.subdirectory+'\'+t.subdirectory AS NVARCHAR(260)),
        t.depth,
        t.is_file
    FROM
        #dirtree AS t
        JOIN CTE
            ON CTE.id=t.parentId
    )
SELECT
    'c:\temp\'+CTE.subdirectory AS [path]
FROM
    CTE
WHERE
    CTE.is_file=1
UNION ALL
SELECT
    'c:\temp\'+t.subdirectory
FROM
    #dirtree AS t
WHERE
    is_file=1
    AND NOT EXISTS
    (
        SELECT
            NULL
        FROM
            CTE
        WHERE
            CTE.id=t.id
    )

结果

path
---------------
c:\temp\a\a.txt
c:\temp\a\b.txt
c:\temp\abc.mdf
c:\temp\a.rb
c:\temp\aaa.flv

编辑

将示例中使用的表格更改为更像您问题中的表格

于 2012-04-24T13:45:20.660 回答
0
/*
   traverse directory tree and get back complete list of filenames w/ their paths
*/

declare
   @dirRoot varchar(255)='\\yourdir' 

declare 
   @sqlCmd    varchar(255),
   @idx       int,
   @dirSearch varchar(255)

declare @directories table(directoryName varchar(255), depth int, isfile int, rootName varchar(255),rowid int identity(1,1))

insert into @directories(directoryName, depth,isFile)
exec master.sys.xp_dirtree @dirRoot,1,1

if not exists(select * from @directories)
   return

update @directories 
set rootName = @dirRoot + '\' + directoryName

-- traverse from root directory
select @idx=min(rowId) from @directories

-- forever always ends too soon 
while 1=1
begin

   select @dirSearch = rootName 
   from @directories 
   where rowid=@idx

   insert into @directories(directoryName, depth,isfile)
   exec master.sys.xp_dirtree @dirSearch,1,1

   update @directories
   set rootName = @dirSearch + '\' + directoryName
   where rootName is null

   set @idx = @idx + 1

   -- you see what i mean don't you?
   if @idx > (select max(rowid) from @directories) or @idx is null
      break

end

  select 
    case isFile when 0 then 'Directory' else 'File' end [attribute], 
    rootName [filePath]  
  from @directories
  order by filePath
于 2013-08-06T21:12:46.160 回答
0

差不多九年后,不幸的是,我所知道的没有开箱即用的解决方案。所以我仍在研究xp_dirtree并需要一个解决方案。

我尝试了 Arion 的答案,发现它正在产生结果。但是,对于超过 11K 对象的大型文件系统,它的运行速度非常慢。我看到它甚至从一开始就非常慢:

UPDATE #dirtree
SET ParentId = (SELECT MAX(Id) FROM #dirtree
      WHERE Depth = T1.Depth - 1 AND Id < T1.Id)
FROM #dirtree T1

尽管这不是孤岛和差距问题,但它有一些相似之处,并且对这些问题的思考对我有所帮助。最后的代码是我的存储过程。这些部分对代码的作用有一些评论。

你会像这样使用它:

exec directory 
    @root = 'c:\somepath', 
    @depth = 3, 
    @outputTable = '##results';

select * from ##results;

这导致输出如下:

+---------------------------------+------------+------------+-----------+--------+-----------+----------+
| path                            | name       | nameNoExt  | extension | isFile | runtimeId | parentId |
+---------------------------------+------------+------------+-----------+--------+-----------+----------+
| c:\somePath\DataMovers          | DataMovers | DataMovers | NULL      | 0      | 4854      | NULL     |
| c:\somePath\DataMovers\main.ps1 | main.ps1   | main       | ps1       | 1      | 4859      | 4854     |
+---------------------------------+------------+------------+-----------+--------+-----------+----------+

我必须以这种方式构建它,因为在内部它需要 xp_dirtree 输出并将其加载到临时表中。由于禁止嵌套的 insert-exec 语句,这阻止了获取 proc 的结果并将它们加载到 proc 之外的表中的能力。不要将 @outputTable 暴露给不受信任的用户,因为它容易受到 sql 注入的影响。当然,重新处理 proc 以避免这种情况,但它可以满足您的需求。

/*

    Summary:        Lists file directory contents.

    Remarks:        - stackoverflow.com/q/10298910
                    - This assumes that the tree is put in order where 
                      subfolders are listed right under their parent
                      folders.  If this changes in the future, a 
                      different logic will need to be implemented.

                
    Example:        exec directory 'c:\somepath', 3, '##results';
                    select * from ##results;

*/
create procedure directory
    @root nvarchar(255),
    @depth int,
    @outputTable sysname
as

-- initializations

    if @outputTable is null or not (left(@outputTable,2) = '##') or charindex(' ', @outputTable) > 0
        throw 50000, '@outputTable must be a global temp table with no spaces in the name.', 1;

    if exists (select 0 from tempdb.information_schema.tables where table_name = @outputTable) 
    begin
        declare @msg nvarchar(255) = '''tempdb.dbo.' + @outputTable + ''' already exists.'; 
        throw 50000, @msg, 1;
    end

-- fetch the tree (it doesn't have full path names)

    drop table if exists #dir;

    create table #dir (
        id int identity(1,1),
        parentId int null,
        path nvarchar(4000),
        depth int,
        isFile bit,
        isLeader int default(0),
        groupId int
    )

    insert      #dir (path, depth, isFile)
    exec        xp_dirtree @root, @depth, 1;

-- identify the group leaders (based on a change in depth)

    update  d
    set     isLeader = _isLeader
    from    (
                select  id, 
                        isLeader, 
                        _isLeader = iif(depth - lag(depth) over(order by id) = 0, 0, 1)
                from    #dir
            ) d;

-- find the parents for each leader (subsetting just for leaders improves efficiency)

    update      #dir
    set         parentId = (
                    select  max(sub.id) 
                    from    #dir sub
                    where   sub.depth = d.depth - 1 
                    and     sub.id < d.id
                    and     d.isLeader = 1
                )
    from        #dir d
    where       d.isLeader = 1;

-- assign an identifier to each group (groups being objects that are 'siblings' of the leader)

    update      d
    set         groupId = _groupId 
    from        (
                    select      *, _groupId = sum(isLeader) over(order by id)
                    from        #dir 
                ) d;

-- set the parent id for each item based on the leader's parent id

    update      d
    set         d.parentId = leads.parentId 
    from        #dir d
    join        #dir leads 
                    on d.groupId = leads.groupId 
                    and leads.parentId is not null;

-- convert the path names to full path names and calculate path parts

    drop table if exists #pathBuilderResults;

    with pathBuilder as (

        select      id, parentId, origId = id, path, pseudoDepth = depth 
        from        #dir 

        union all
        select      par.id, 
                    par.parentId,
                    pb.origId,
                    path = par.path + '\' + pb.path,
                    pseudoDepth = pb.pseudoDepth - 1
        from        pathBuilder pb
        join        #dir par on pb.parentId = par.id
        where       pb.pseudoDepth >= 2 

    )

    select      path = @root + '\' + pb.path,
                name = d.path,
                nameNoExt = iif(ext.value is null, d.path, left(d.path, len(d.path) - len(ext.value) - 1)),
                extension = ext.value,
                d.isFile,
                runtimeId = pb.origId,
                parentId = d.parentId
    into        #pathBuilderResults
    from        pathBuilder pb
    join        #dir d on pb.origId = d.id
    cross apply (select value = charindex('.', d.path)) dotPos
    cross apply (select value = right(d.path, len(d.path) - dotPos.value)) pseudoExt 
    cross apply (select value = iif(d.isFile = 1 and dotPos.value > 0, pseudoExt.value, null)) ext
    where       pb.pseudoDepth = 1
    order by    pb.origId;

-- terminate

    declare @sql nvarchar(max) = 'select * into ' + @outputTable + ' from #pathBuilderResults';
    exec (@sql);
于 2021-02-04T23:23:39.010 回答
-2

创建并使用 sp_dirtree @Path = 'c:\', @FileOnly = 1

 create or alter proc sp_dirtree
       @Path      nvarchar(4000) 
     , @Depth     int = 0
     , @FileOnly  bit = 0
    as -- Dir tree with fullpath. sergkog 2018-11-14
      set nocount on
      declare @Sep nchar(1) = iif(patindex('%/%',@Path) > 0,'/','\') -- windows or posix
      set @Path += iif(right(@Path,1) <> @Sep, @Sep,'')

    declare @dirtree table(
        Id int identity(1,1) 
      , subdirectory nvarchar(4000) not null
      , depth    int  not null
      , is_file  bit  not null
      , parentId int  null
    )

    insert @dirtree(subdirectory, depth, is_file)
    exec xp_dirtree @Path, @Depth, 1

    update @dirtree
       set ParentId = (select max(id) from @dirtree where Depth = t1.Depth - 1 and Id < t1.Id)
      from @dirtree t1

    ;with cte as(
      select t.*
      from @dirtree t
      where is_file=0
     union all
      select t.id
           , convert(nvarchar(4000), cte.subdirectory+ @Sep + t.subdirectory)
           , t.depth
           , t.is_file
           , t.parentId
      from
        @dirtree t join cte on cte.id = t.parentId
     )
     select @Path + cte.subdirectory as FullPath
          , cte.is_file as IsFile
     from cte
     where cte.is_file = iif(@FileOnly = 1, 1,cte.is_file)
     union all
     select @Path + t.subdirectory
          , t.is_file
     from @dirtree t    
     where
        t.is_file = iif(@FileOnly = 1, 1,t.is_file)
        and not exists(select null from cte 
                       where cte.id=t.id
        )
    order by FullPath, IsFile
    go
于 2018-11-14T19:44:40.353 回答