0

我编写了以下脚本来匿名化 txt 文件中的电子邮件地址:

import io, os, sys
import re

def main():

try:
    # Open the file.
    myfile = open('emails.txt', 'r')

    # Read the file's contents.
    content = myfile.read()
    content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)

    myfile = open('emails.txt', 'w')
    myfile.write(content)   
    # Close the file.
    myfile.close()

except IOError:
    print('An error occured trying to read the file.')

except:
    print('An error occured.')

main()

我想知道如何使目录及其子目录中的所有文件都可以使用。

4

2 回答 2

1

os.walk()是你想要的。我对您的代码段进行了更改以演示:

#!/usr/bin/env python

import re
from os import walk
from os.path import join

def main():
    for (dirpath, _, filenames) in walk('/path/to/root'):
        for filename in filenames:
            # Build the path to the current file.
            path_to_file = join(dirpath, filename)
            content = None
            # Open the file.
            with open(path_to_file, 'r') as myfile:
                print 'Reading {0}'.format(path_to_file)
                # Read the file's contents.
                content = myfile.read()
                content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)

            with open(path_to_file, 'w') as myfile:
                myfile.write(content)

main()
于 2013-03-05T08:21:17.173 回答
0

使用 glob.glob

import io, os, sys
import re
import glob

def main():
    try:
        # Open the file.
        for f in glob.iglob('/path/to/root/*'):
            if not os.path.isfile(f):
                continue
            myfile = open(f, 'r')

            # Read the file's contents.
            content = myfile.read()
            content = re.sub(r'.+(?=@.+\.(com|edu))', "xxxx", content)

            myfile = open(f.replace('.txt', '.new.txt'), 'w')
            myfile.write(content)
            # Close the file.
            myfile.close()

        except IOError:
            print('An error occured trying to read the file.')
        except:
            print('An error occured.')

main()
于 2013-03-05T09:02:42.383 回答