1

我还是 Java 语言和库的新手……我经常在 Python 中使用这种模式,想知道我应该如何用 Java 实现这个模式。

我需要逐行读取一个巨大的文件,并带有某种 xml 标记(我正在生成输入,所以我确信不会有任何歧义)

我想在大文件的某些部分中进行迭代,例如下面的 python 代码:

(使用yield / python迭代器模式......在Java中有任何等价物吗?我真的很喜欢for item in my collection: yield something_about(many items)

实现这种行为的最佳(java)方法是什么?

谢谢

第一次编辑:顺便说一句,我也会对 List 和 File 之间的类似映射感兴趣,从 Python 的角度来看,使用 file 和 [python list,] 如果当然可以使用 Java => 答案:请参阅 Jeff Foster使用建议:Apache.IOUtils

def myAcc(instream, start, end):
    acc = []
    inside = False
    for line in instream:
        line = line.rstrip()
        if line.startswith(start):
            inside = True
        if inside:
            acc.append(line)
        if line.startswith(end):
            if acc:
                yield acc
                acc = []
            inside = False


f = open("c:/test.acc.txt")

s = """<c>
<a>
this is a test
</a>
<b language="en" />
</c>
<c>
<a>
ceci est un test
</a>
<b language="fr" />
</c>
<c>
<a>
esta es una prueba
</a>
<b language="es" />
</c>"""

f = s.split("\n")   # here mimic for a input file...

print "Reading block from <c> tag!"
for buf in myAcc(f, "<c>", "</c>"):
    print buf # actually process this inner part... printing is for simplification
    print "-" * 10

print "Reading block from <a> tag!"
for buf in myAcc(f, "<a>", "</a>"):
    print buf  # actually process this inner part...
    print "-" * 10    

输出 :

Reading block from <c> tag!
['<c>', '<a>', 'this is a test', '</a>', '<b language="en" />', '</c>']
----------
['<c>', '<a>', 'ceci est un test', '</a>', '<b language="fr" />', '</c>']
----------
['<c>', '<a>', 'esta es una prueba', '</a>', '<b language="es" />', '</c>']
----------
Reading block from <a> tag!
['<a>', 'this is a test', '</a>']
----------
['<a>', 'ceci est un test', '</a>']
----------
['<a>', 'esta es una prueba', '</a>']
----------

所以直接受到下面 Jeff Foster 的回答的启发,这里尝试解决我的问题并做与我的 python 代码相同的事情:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;

interface WorkerThing { 
    public void doSomething(List<String> acc); 
} 

class ThatReadsLargeFiles { 
    public void readAHugeFile( BufferedReader input, String start, String end, WorkerThing action) throws IOException { 
        // TODO write some code to read through the file and store it in line 
        List<String> acc = new ArrayList<String> ();
        String line; 
        Boolean inside = false;
        while ((line = input.readLine()) != null) {
            if (line.equals(start)) {
                inside = true;
            }
            if (inside) {
                acc.add(line);
            }
            if (line.equals(end)) {
                if (acc != null && !acc.isEmpty()) { // well not sure if both are needed here...
                    // Here you are yielding control to something else
                    action.doSomething(acc);
                    //acc.clear(); // not sure how to empty/clear a list... maybe : List<String> acc = new ArrayList<String> (); is enough/faster?
                    acc = new ArrayList<String> (); // looks like this is the *right* way to go!
                }
                inside = false;
                // ending
            }
        } 
        input.close();
    } 
 }

public class YieldLikeTest {

    public static void main(String[] args) throws IOException {


        String path = "c:/test.acc.txt";

        File myFile = new File(path);
        BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(myFile), "UTF8"));
        //BufferedReader in = new BufferedReader(new FileReader(path, "utf8"));

        new ThatReadsLargeFiles().readAHugeFile(in, "<a>", "</a>", new WorkerThing() {
                public void doSomething(List<String> acc) {
                    System.out.println(acc.toString());
                }
        });


    }

}

第二次编辑:我接受这个答案的速度太快了,实际上,我仍然想念并且有一个误解:我不知道如何获取并跟踪acc@最上层(不在匿名类中)的内容。这样它就可以从调用中使用,而不是打印,例如实例化一个类,并进行其他处理......产量允许这种用法,我不知道如何调整建议的答案以具有这种行为. 抱歉,我的 Python 用法/示例太简单了。

所以这是来自 Jeff Foster 对记忆 acc 的解释的答案:

class betweenWorker implements WorkerThing {

    private List<String> acc;

    public void process(List<String> acc) {
        this.acc = acc;
    }
    public List<String> getAcc() { return this.acc; }
}
4

2 回答 2

1

Java 不支持类似的东西yield,但是您可以通过创建一个封装了您将在各个行上执行的操作的接口来实现相同的功能。

interface WorkerThing {
  void doSomething(string lineOfText);
}

class ThatReadsLargeFiles {
    public void readAHugeFile(WorkerThing actions) {
        // TODO write some code to read through the file and store it in line

        // Here you are yielding control to something else
        action.doSomething(line);
    }
 }

当你使用它时,你可以使用匿名接口实现来让事情变得更容易忍受。

new ThatReadsLargeFiles().readAHugeFile(new WorkerThing() {
    void doSomething(string text) {
        System.out.println(text); 
    }
};
于 2012-06-20T13:31:46.973 回答
1

如果我正确理解了 Python 代码和您的任务,则可以通过以下方式完成:

// callback representing function to be invoked on String array
public interface Callback {
    void process(List<String> lines);
}

public void processFile(final String path, final String start, final String end, final Callback callback) {
    BufferedReader reader = null;
    try {
        final FileReader fileReader = new FileReader(path);
        reader = new BufferedReader(fileReader);

        List<String> lines = new ArrayList<String>();
        boolean inside = false;

        String line = null;
        while((line = reader.readLine()) != null) {
            if(line.startsWith(start)) {
                inside = true;
            }

            if(inside) {
                lines.add(line);
            }

            if(line.startsWith(end)) {
                inside = false;
                callback.process(lines);
                lines = new ArrayList<String>();
            }
        }
    // following is just boilerplate to correctly close readers
    } catch(final IOException ex) {
        ex.printStackTrace();
    } finally {
        if(reader != null) {
            try {
                reader.close();
            } catch(final IOException e) {
                e.printStackTrace();
            }
        }
    }
}

然后在你的代码中的某个地方你可以调用这个方法,如下所示:

// ...
// creating instance of class implementing Callback interface (this is called 'anonymous' class in Java)
final Callback myCallback = new Callback() {
    public void process(List<String> lines) {
        for(String line : lines) {
           System.out.println(line);
        }
    }
};
processFile("path/to/file", "<c>", "</c>", myCallback);
processFile("path/to/file", "<a>", "</a>", myCallback);
// ...

希望这可以帮助 ...

于 2012-06-20T13:50:16.480 回答