我想调查一组目录并获取以下信息
- 文件类型的文件数
按文件类型列出的具有完整路径的文件列表
每个子目录目录 1&2
我有以下代码。ext 列表生成器很好。我被困在如何为每个扩展名分配列表和计数器的名称上。我们不知道这些将是什么或有多少。不知道在此之后还会出现什么其他问题。
import os, sys, datetime
top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
outDIR = top+"\\workingFiles" # directory where output is written to. Includes temp files
# END setting base paths
# NOTHING BELOW should need editing.
List =[]
extList=[]
os.chdir(top)
for root, dirs, files in os.walk(SourceDIR, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
ext=fl[fl.rfind('.'):]
if ext not in extList:
extList.append(ext)
List.append(currentFile)
print extList
for ext in extList:
ext+"Counter"=0
ext+"FileList"=[]
for fl in List:
ext=fl[fl.rfind('.'):]
ext+"Counter"+=1
ext+"FileList".append(fl)
for ext in extList:
print ext
print ext+"Counter"
print ext+"FileList"
CODE 根据答案更新。txt 文件的问题,因为它只创建一个文本文件。
# iterate over dictionary keys
for elem in ext_dict.keys():
print elem
print ext_dict[elem]["Counter"]
print ext_dict[elem]["FileList"]
log = open(elem+'_Log.txt', 'a')
Num=0
for fl in ext_dict[elem]["FileList"]:
Num+=1
log.write(str(Num)+","+str(fl)+"\n")
log.close()
任何人都可以使用的最终脚本如下。
#-------------------------------------------------------------------------------
# Name: File_Review
# Purpose: Review of all files in directory/subdirectories with report on file type and size
#
# Author: georgec
#
# Created: 25/01/2013
# Copyright: (c) ATGIS 2013
# Licence: Creative Commons 3.0 - BY
#-------------------------------------------------------------------------------
import os, sys, datetime
top = os.getcwd() # change to a specific path if required.
RootOutput = top
SourceDIR = top
SourceDIR = r'P:\2013'
outDIR = top # directory where output is written to. Includes temp files
finalDIR = top+"\\final" # folder for final data only
DirLimiterList=['']
# END setting base paths
# NOTHING BELOW should need editing.
os.chdir(top)
def InvestigateFiles(SourceDIR,outDIR,DirLimiter):
List =[]
extList=[]
dirList=[]
dirCount=0
for root, dirs, files in os.walk(SourceDIR, topdown=False):
for fl in files:
currentFile=os.path.join(root, fl)
ext=fl[fl.rfind('.')+1:]
if ext!='':
if DirLimiter in currentFile:
List.append(currentFile)
directory1=os.path.basename(os.path.normpath(currentFile[:currentFile.rfind(DirLimiter)]))
directory2=(currentFile[len(SourceDIR):currentFile.rfind('\\'+directory1+DirLimiter)])
directory=directory2+'\\'+directory1
if directory not in dirList:
dirCount+=1
dirList.append(directory)
if ext not in extList:
extList.append(ext)
print extList
ext_dict = {}
# Create the dictionary
for ext in extList:
ext_dict[ext] = {}
ext_dict[ext]["Counter"] = 0
ext_dict[ext]["FileList"] = []
#populate the dictionary
for fl in List:
if ext_dict.has_key(fl[fl.rfind('.')+1:]):
ext = fl[fl.rfind('.')+1:]
ext_dict[ext]["Counter"] = ext_dict[ext]["Counter"] + 1
ext_dict[ext]["FileList"].append(fl)
# iterate over dictionary keys
for elem in ext_dict.keys():
uniqueDirList=[]
print elem
print ext_dict[elem]["Counter"]
count= ext_dict[elem]["Counter"]
print ext_dict[elem]["FileList"]
log = open(elem+'_'+DirLimiter[DirLimiter.find('\\')+1:DirLimiter.rfind('\\')]+'_Log.txt', 'a')
Num=0
for fl in ext_dict[elem]["FileList"]:
Num+=1
log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+"\n")
## finaldir=fl[fl.rfind(DirLimiter):fl.rfind('\\')]
## directory2=fl[fl.rfind('\\Input\\')+6:fl.rfind('\\')]
## uniqueDir=directory2+finaldir
## if uniqueDir not in uniqueDirList:
## uniqueDirList.append(uniqueDir)
## log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";"+str(uniqueDir)+'\n')
## log.write(finaldir+"\n"+directory2+"\n"+uniqueDir+"\n"+"\n")
## else:
## log.write(str(Num)+";"+str(fl)+";"+str(os.path.getsize(fl))+";\n")
## log.write('Directories: '+str(count)+'\n Unique Directories: '+str(len(uniqueDirList)))
log.close()
for DirLimiter in DirLimiterList:
InvestigateFiles(SourceDIR,outDIR,DirLimiter)