5

这可能是一个令人困惑的问题,但我在目录爬虫下面写了,它将从根爬虫开始,找到所有唯一目录,然后找到所有文件并计算它们并添加它们的文件大小。但是,我编写它的方式需要访问目录两次,一次是查找目录,下一次是计算文件。如果/如何一次获得所有信息?

       Stopwatch stopwatch = new Stopwatch();
        stopwatch.Start();
        HashSet<string> DirectoryHolding = new HashSet<string>();
        DirectoryHolding.Add(rootDirectory);


        #region All Directory Region
        int DirectoryCount = 0;
        int DirectoryHop = 0;
        bool FindAllDirectoriesbool = true;
        while (FindAllDirectoriesbool == true)
        {
            string[] DirectoryHolder = Directory.GetDirectories(rootDirectory);
            if (DirectoryHolder.Length == 0)
            {
                if (DirectoryHop >= DirectoryHolding.Count())
                {
                    FindAllDirectoriesbool = false;
                }
                else
                {
                    rootDirectory = DirectoryHolding.ElementAt(DirectoryHop);
                }
                DirectoryHop++;
            }
            else
            {
                foreach (string DH in DirectoryHolder)
                {
                    DirectoryHolding.Add(DH);
                }
                if (DirectoryHop > DirectoryHolding.Count())
                {
                    FindAllDirectoriesbool = false;
                }
                rootDirectory = DirectoryHolding.ElementAt(DirectoryHop);
                DirectoryHop++;

            }

        }
        DirectoryCount = DirectoryHop - 2;
        #endregion



        #region File Count and Size Region
        int FileCount = 0;
        long FileSize = 0;
        for (int i = 0; i < DirectoryHolding.Count ; i++)
        {
            string[] DirectoryInfo = Directory.GetFiles(DirectoryHolding.ElementAt(i));
            for (int fi = 0; fi < DirectoryInfo.Length; fi++)
            {
                try
                {
                    FileInfo fInfo = new FileInfo(DirectoryInfo[fi]);
                    FileCount++;
                    FileSize = FileSize + fInfo.Length;
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message.ToString());
                }
            }
        }

秒表结果为 1.38

int FileCount = 0;
        long FileSize = 0;
        for (int i = 0; i < DirectoryHolding.Count; i++)
        {
            var entries = new DirectoryInfo(DirectoryHolding.ElementAt(i)).EnumerateFileSystemInfos();
            foreach (var entry in entries)
            {
                if ((entry.Attributes & FileAttributes.Directory) == FileAttributes.Directory)
                {
                    DirectoryHolding.Add(entry.FullName);
                }
                else
                {
                    FileCount++;
                    FileSize = FileSize + new FileInfo(entry.FullName).Length;
                }
            }
        }

此方法的秒表是 2.01,

这对我来说毫无意义。

 DirectoryInfo Dinfo = new DirectoryInfo(rootDirectory);
            DirectoryInfo[] directories = Dinfo.GetDirectories("*.*", SearchOption.AllDirectories);
            FileInfo[] finfo = Dinfo.GetFiles("*.*", SearchOption.AllDirectories);
            foreach (FileInfo f in finfo)
            {
                FileSize = FileSize + f.Length;
            }
            FileCount = finfo.Length;
            DirectoryCount = directories.Length; 

.26 秒我认为这是赢家

4

2 回答 2

8

您可以使用Directory.EnumerateFileSystemEntries()

var entries = Directory.EnumerateFileSystemEntries(rootDirectory);
foreach (var entry in entries)
{
    if(File.Exists(entry))
    {
        //file
    }
    else
    {
        //directory
    }
}

或者DirectoryInfo.EnumerateFileSystemInfos()(这可能会更高效,因为FileSystemInfo已经拥有您需要的大部分信息,您可以跳过File.Exists检查):

var entries = new DirectoryInfo(rootDirectory).EnumerateFileSystemInfos();
foreach (var entry in entries)
{
    if ((entry.Attributes & FileAttributes.Directory) == FileAttributes.Directory)
    {
        //direcotry
    }
    else
    {
        //file
    }
}
于 2012-03-28T18:22:01.943 回答
2

通常的做法是编写递归方法。这是伪代码:

void ProcessDirectory(Dir directory)
{
    foreach (var file in directory.Files)
        ProcessFile(file);

    foreach (var child in directory.Subdirectories)
        ProcessDirectory(directory);
}

您还可以颠倒 foreach 循环的顺序。例如,要使用递归方法计算所有文件的总大小,您可以这样做:

int GetTotalFileSize(Dir directory)
{
    ulong result = 0UL;

    foreach (var child in directory.Subdirectories)
        result += GetTotalFileSize(directory);

    foreach (var file in directory.Files)
        result += file.Length;

    return result;
}
于 2012-03-28T18:32:17.267 回答