0

嗨,我正在尝试在 Java 上编写一个单词计数器类。我想我正在从基本文件夹中读取带有扫描仪的文件并将它们打印到控制台。但是,文件的第一项返回前缀 ÿş 或有时 ?? 两个问号。文件中的每个项目都是字符串单词。这是我的源代码,我无法处理这个,所以请任何帮助将不胜感激,谢谢......(顺便说一句,我使用的是 JCreator LE 4.5)

import java.io.*;
import java.util.*;

public class WordCounter implements Comparator<Integer>{

    public static Scanner myScanner;
    private static int orderNumber = 0;
    private static String inputName = "";
    private static String outputName = "";
    private static LinkedHashMap<String, Integer> dictionary = new LinkedHashMap<String, Integer>();
    private static ArrayList<String> words = new ArrayList<String>();
    private static SortedSet<String> keys;
    private static Scanner in;

    public static void main(String[] args) {
        myScanner = new Scanner(System.in);
        System.out.println("Please enter a file name to read...");
        inputName = myScanner.nextLine();
        System.out.println("Please enter a file name to write in...");
        outputName = myScanner.nextLine();
        askForOptions();

        readFromFile(inputName);
        writeToFile(outputName, orderNumber);
    }

    private static void readFromFile(String fileName){
        try {
            in = new Scanner(new File(fileName+".txt"));
            while(in.hasNext()){
                String lowered = in.next().toLowerCase();
                if(!lowered.equals(" ") || !lowered.equals("") || !lowered.equals(null)){
                    System.out.println(lowered);
                    int lastInd = lowered.length()-1;
                    char lastChar = lowered.charAt((lastInd-1));
                    System.out.println(lastChar);
                    if (lastChar == '?' || lastChar == ',' || lastChar == '.'){
                        String newLowered = lowered.substring(0, (lastInd-1));
                        words.add(newLowered);
                    }else{
                        words.add(lowered);
                    }
                }
            }
            in.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
    }

    private static void writeToFile(String fileName, int orderNumber){
        for(String word: words) {
            if(dictionary.containsKey(word)){
                int val = (int) dictionary.get(word);
                dictionary.put(word, val+1);
            }else{
                dictionary.put(word, 1);
            }
        }

        if(orderNumber == 1){
             keys = new TreeSet<String>(dictionary.keySet());
             try {
                    FileWriter writer = new FileWriter(fileName+".txt");
                    for(String key:keys){
                        writer.write(key + "\t" + dictionary.get(key) + "\n");
                    }
                    writer.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
        }else if(orderNumber == 2){
            Comparator<Integer> comp = new Comparator<Integer>() ;
            TreeMap<Integer, String> wordsMap = new TreeMap<Integer, String>(comp);
            for(Map.Entry<String, Integer> entry:dictionary.entrySet()){
                wordsMap.put(entry.getValue(),entry.getKey());
            }
            try {
                FileWriter writer = new FileWriter(fileName+".txt");
                for(Map.Entry<Integer, String> entry: wordsMap.entrySet()){
                    writer.write(entry.getValue() + "\t" + entry.getKey() + "\n");
                }
                writer.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    private static void askForOptions(){
        System.out.println("How do you want the result? \nPress 1 to get result in alphabethic order, \nPress 2 to in frequency order.");
        int option = myScanner.nextInt();
        if (option == 1){
            orderNumber = 1;
            System.out.println("Thank you! Good luck...");
        }else if(option == 2){
            orderNumber = 2;
            System.out.println("Thank you! Good luck...");
        }else{
            System.out.println("Invalid choice! Plese try again...");
            askForOptions();
        }
    }

    @Override
    public int compare(Integer arg0, Integer arg1) {
        if (arg0 == arg1) return 0;
        if (arg0 > arg1) return 1;
        if (arg0 < arg1) return -1;
        return 0;
    }

}
4

1 回答 1

0

ÿş是使用Windows 1254 codepage打印的UTF-16 BOM 字节FF FE,我相信这是您的系统默认值。

要正确读取文件,您需要跳过 BOM,这可以使用Apache Commons IO BOMInputStream包装器完成:

try (BOMInputStream bis = new BOMInputStream(new FileInputStream(filename));
     Scanner in = new Scanner(bis, bis.getBOMCharsetName() == null 
                                   ? Charset.defaultCharset().name() 
                                   : bis.getBOMCharsetName())) {
     // read lines

} catch (IOException e) {
     // ...
}

或者您可以按照本文答案中的说明手动跳过这 2 个字节

于 2015-10-21T10:08:09.463 回答