0

我想调查一组目录并获取以下信息

  1. 文件类型的文件数
  2. 按文件类型列出的具有完整路径的文件列表

  3. 每个子目录目录 1&2

我有以下代码。ext 列表生成器很好。我被困在如何为每个扩展名分配列表和计数器的名称上。我们不知道这些将是什么或有多少。不知道在此之后还会出现什么其他问题。

import os, sys, datetime

top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
outDIR = top+"\\workingFiles" # directory where output is written to. Includes temp files
# END setting base paths
# NOTHING BELOW should need editing.
List =[]
extList=[]

os.chdir(top)

for root, dirs, files in os.walk(SourceDIR, topdown=False):
    for fl in files:
      currentFile=os.path.join(root, fl)
      ext=fl[fl.rfind('.'):]
      if ext not in extList:
        extList.append(ext)
      List.append(currentFile)

print extList

for ext in extList:
    ext+"Counter"=0
    ext+"FileList"=[]

for fl in List:
    ext=fl[fl.rfind('.'):]
    ext+"Counter"+=1
    ext+"FileList".append(fl)

for ext in extList:
    print ext
    print ext+"Counter"
    print ext+"FileList"

CODE 根据答案更新。txt 文件的问题,因为它只创建一个文本文件。

# iterate over dictionary keys
for elem in ext_dict.keys():
    print elem
    print ext_dict[elem]["Counter"]
    print ext_dict[elem]["FileList"]
    log = open(elem+'_Log.txt', 'a')
    Num=0
    for fl in ext_dict[elem]["FileList"]:
        Num+=1
        log.write(str(Num)+","+str(fl)+"\n")
    log.close()

任何人都可以使用的最终脚本如下。

#-------------------------------------------------------------------------------
# Name:    File_Review
# Purpose: Review of all files in directory/subdirectories with report on file type and size
#
# Author:      georgec
#
# Created:     25/01/2013
# Copyright:   (c) ATGIS 2013
# Licence:     Creative Commons 3.0 - BY
#-------------------------------------------------------------------------------

import os, sys, datetime

top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
SourceDIR = r'P:\2013'
outDIR = top # directory where output is written to. Includes temp files
finalDIR = top+"\\final" # folder for final data only
DirLimiterList=['']

# END setting base paths
# NOTHING BELOW should need editing.

os.chdir(top)

def InvestigateFiles(SourceDIR,outDIR,DirLimiter):
    List =[]
    extList=[]
    dirList=[]
    dirCount=0
    for root, dirs, files in os.walk(SourceDIR, topdown=False):
        for fl in files:
            currentFile=os.path.join(root, fl)
            ext=fl[fl.rfind('.')+1:]
            if ext!='':
                if DirLimiter in currentFile:
                    List.append(currentFile)
                    directory1=os.path.basename(os.path.normpath(currentFile[:currentFile.rfind(DirLimiter)]))
                    directory2=(currentFile[len(SourceDIR):currentFile.rfind('\\'+directory1+DirLimiter)])
                    directory=directory2+'\\'+directory1
                    if directory not in dirList:
                        dirCount+=1
                        dirList.append(directory)


            if ext not in extList:
              extList.append(ext)

    print extList

    ext_dict = {}

    # Create the dictionary
    for ext in extList:
        ext_dict[ext] = {}
        ext_dict[ext]["Counter"] = 0
        ext_dict[ext]["FileList"] = []

    #populate the dictionary
    for fl in List:
        if ext_dict.has_key(fl[fl.rfind('.')+1:]):
            ext = fl[fl.rfind('.')+1:]
            ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
            ext_dict[ext]["FileList"].append(fl)

    # iterate over dictionary keys
    for elem in ext_dict.keys():
        uniqueDirList=[]
        print elem
        print ext_dict[elem]["Counter"]
        count= ext_dict[elem]["Counter"]
        print ext_dict[elem]["FileList"]
        log = open(elem+'_'+DirLimiter[DirLimiter.find('\\')+1:DirLimiter.rfind('\\')]+'_Log.txt', 'a')
        Num=0
        for fl in ext_dict[elem]["FileList"]:
            Num+=1
            log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+"\n")
##            finaldir=fl[fl.rfind(DirLimiter):fl.rfind('\\')]
##            directory2=fl[fl.rfind('\\Input\\')+6:fl.rfind('\\')]
##            uniqueDir=directory2+finaldir
##            if uniqueDir not in uniqueDirList:
##             uniqueDirList.append(uniqueDir)
##             log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";"+str(uniqueDir)+'\n')
##             log.write(finaldir+"\n"+directory2+"\n"+uniqueDir+"\n"+"\n")
##            else:
##             log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";\n")
##        log.write('Directories: '+str(count)+'\n Unique Directories: '+str(len(uniqueDirList)))
        log.close()

for DirLimiter in DirLimiterList:
 InvestigateFiles(SourceDIR,outDIR,DirLimiter)
4

1 回答 1

1

您应该使用字典来存储数据

ext_dict = {}

# Create the dictionary
for ext in extList:
    ext_dict[ext] = {}
    ext_dict[ext]["Counter"] = 0
    ext_dict[ext]["FileList"] = []

#populate the dictionary
for fl in List:
    if ext_dict.has_key(f1[f1.rfind('.'):]):
        ext = f1[f1.rfind('.'):]
        ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
        ext_dict[ext]["FileList"].append(fl)

# iterate over dictionary keys
for elem in ext_dict.keys():
    print elem
    print ext_dict[elem]["counter"]
    print ext_dict[elem]["FileList"]
于 2013-01-31T04:08:47.030 回答