-2

我有这个:

from os import path
base_path = "C:\\texts\\*.txt"
for file in files:
   with open (file) as in_file, open(path.join(base_path,"%s_tokenized.txt" % file),   "w") as out_file:
       data = in_file.readlines()
       for line in data:
           words = line.split()
           str1 = ','.join(words)
           out_file.write(str1)
           out_file.write("\n")

它在读取的同一目录中生成标记化文件。如何将这些 out_files 输出到不同的目录中,例如"C:\\texts\\Tokenized"

我知道有一些方法可以在生成这些新文件后将它们移动到其他目录,但我想知道的是,如果在上面的代码中生成新文件的同时将它们输出到其他目录?

4

2 回答 2

0

这就是我输出到任意目录中文件的方式:

dir_name = "../some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + '/out.txt'
out_file = open( out_file_name, 'w')  

编辑 :

file_name = "{0}_tokenized.txt".format(something_from_tokenizing)
if not os.path.exists(dir_name) : os.makedirs(dir_name)
out_file_name = dir_name + file_name

编辑 :

我刚试过,对我有用。您只需要两条路径,一条用于源目录,一条用于目标。希望这可以帮助。

import os
from os import path
f1 = open("in.txt")
f2 = open("out.txt")
files = ["in.txt", "out.txt"]
base_path = "."
dir_name = "./some_dir"
if not os.path.exists(dir_name) : os.makedirs(dir_name)
for file in files:
   with open (file) as in_file, open(path.join(dir_name,"%s_tokenized.txt" % file),   "w") as out_file:
       data = in_file.readlines()
       for line in data:
           words = line.split()
           str1 = ','.join(words)
           out_file.write(str1)
           out_file.write("\n")
于 2013-11-06T04:21:12.483 回答
0

这是你要找的:

import os
import glob
source_pattern = 'c:/texts/*.txt'
output_directory = 'c:/texts/tokenized'

# Iterate over files matching source_pattern
for input_file in glob.glob(source_pattern):

    # build the output filename
    base,ext = os.path.splitext(os.path.basename(input_file))
    output_file = os.path.join(output_directory,base + '_tokenized' + ext)

    with open(input_file) as in_file, open(output_file,'w') as out_file:
        for line in in_file:
            out_file.write(','.join(line.split()) + '\n')
于 2013-11-06T05:36:02.913 回答