-3
   import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;

public class Test {

    List<String> knownWordsArrayList = new ArrayList<String>();
    List<String> wordsArrayList = new ArrayList<String>();
    List<String> newWordsArrayList = new ArrayList<String>();
    String toFile = "";

    public void readKnownWordsFile() {
        try {
            FileInputStream fstream2 = new FileInputStream("knownWords.txt");

            BufferedReader br2 = new BufferedReader(new InputStreamReader(fstream2, "UTF-8"));
            String strLine;
            while ((strLine = br2.readLine()) != null) {
                knownWordsArrayList.add(strLine.toLowerCase());
            }
            HashSet h = new HashSet(knownWordsArrayList);
            // h.removeAll(knownWordsArrayList);
            knownWordsArrayList = new ArrayList<String>(h);
            // for (int i = 0; i < knownWordsArrayList.size(); i++) {
            // System.out.println(knownWordsArrayList.get(i));
            // }
        } catch (Exception e) {
            // TODO: handle exception
        }

    }

    public void readFile() {
        try {
            // Open the file that is the first
            // command line parameter
            FileInputStream fstream = new FileInputStream("Smallville 4x02.de.srt");

            BufferedReader br = new BufferedReader(new InputStreamReader(fstream));

            String strLine;

            String numberedLineRemoved = "";
            String strippedInput = "";
            String[] words;
            String trimmedString = "";
            String temp = "";
            // Read File Line By Line
            while ((strLine = br.readLine()) != null) {
                temp = strLine.toLowerCase();
                // Print the content on the console
                numberedLineRemoved = numberedLine(temp);
                strippedInput = numberedLineRemoved.replaceAll("\\p{Punct}", "");
                if ((strippedInput.trim().length() != 0) || (!strippedInput.contains("")) || (strippedInput.contains(" "))) {
                    words = strippedInput.split("\\s+");
                    for (int i = 0; i < words.length; i++) {
                        if (words[i].trim().length() != 0) {
                            wordsArrayList.add(words[i]);
                        }
                    }
                }
            }

            HashSet h = new HashSet(wordsArrayList);
            h.removeAll(knownWordsArrayList);
            newWordsArrayList = new ArrayList<String>(h);

            // HashSet h = new HashSet(wordsArrayList);
            // wordsArrayList.clear();
            // newWordsArrayList.addAll(h);

            for (int i = 0; i < newWordsArrayList.size(); i++) {
                toFile = newWordsArrayList.get(i) + ".\n";
//              System.out.println(newWordsArrayList.get(i) + ".");
                System.out.println();
            }

            System.out.println(newWordsArrayList.size());
            // Close the input stream
            in.close();
        } catch (Exception e) {// Catch exception if any
            System.err.println("Error: " + e.getMessage());
        }
    }

    public String numberedLine(String string) {
        if (string.matches(".*\\d.*")) {
            return "";
        } else {
            return string;
        }
    }

    public void writeToFile() {
        try {
            // Create file
            FileWriter fstream = new FileWriter("out.txt");
            BufferedWriter out = new BufferedWriter(fstream);
            out.write(toFile);
            // Close the output stream
            out.close();
        } catch (Exception e) {// Catch exception if any
            System.err.println("Error: " + e.getMessage());
        }
    }

    public static void main(String[] args) {
        Test test = new Test();
        test.readKnownWordsFile();
        test.readFile();
        test.writeToFile();

    }

}

如何从文件中读取äöüß?string.toLowercase() 也会正确处理这些吗?当我去打印包含任何äöüß的单词时,我怎样才能正确打印这个单词?当我打印到控制台时,我得到 Außerdem weiß for Außerdem weiß 我该如何解决这个问题?

我试过:

BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));

但现在我得到了 aufkl?ren 而不是 aufklären 并且它在其他地方也搞砸了。

更新了代码以查看它是否会正确打印在文件上,但我只是在文件中得到一个。

4

1 回答 1

1

您需要使用用于创建文件的字符集来读取文件。如果您在 Windows 机器上,那可能是 cp1252。所以:

BufferedReader br = new BufferedReader(new InputStreamReader(in, "Cp1252"));

如果这不起作用,大多数文本编辑器都能够告诉您给定文档使用什么编码。

于 2013-04-12T19:36:00.067 回答