0

我之前问过这个问题(Counting distinct words with Threads)并使代码更合适。如第一个问题所述,我需要计算文件中的不同单词。

调试表明我所有的单词都正确存储和排序,但现在的问题是测试类中的无限“while”循环在读取所有单词后继续进行(调试确实有助于找出一些要点。 ..)。我现在正在一个不超过 10 个字的小文件上测试代码。

DataSet 类进行了大部分修改。

我需要一些建议如何摆脱困境。

测试看起来像这样:

package test;

import java.io.File;
import java.io.IOException;

import junit.framework.Assert;
import junit.framework.TestCase;
import main.DataSet;
import main.WordReader;

public class Test extends TestCase
{

   public void test2() throws IOException
   {
      File words = new File("resources" + File.separator + "test2.txt");

      if (!words.exists())
      {
         System.out.println("File [" + words.getAbsolutePath()
               + "] does not exist");
         Assert.fail();
      }

      WordReader wr = new WordReader(words);
      DataSet ds = new DataSet();

      String nextWord = wr.readNext();
      // This is the loop
      while (nextWord != "" && nextWord != null)
      {
         if (!ds.member(nextWord))
         {
            ds.insert(nextWord);
         }
         nextWord = wr.readNext();
      }
      wr.close();
       System.out.println(ds.toString());
       System.out.println(words.toString() + " contains " + ds.getLength()
       + " distinct words");

   }

}

这是我更新的 DataSet 类,尤其是 member() 方法,我仍然不确定,因为在某些时候我曾经得到一个 NullPointerExeption (不知道为什么......):

package main;

import sort.Sort;

public class DataSet
{

   private String[] data;
   private static final int DEFAULT_VALUE = 200;
   private int nextIndex;
   private Sort bubble;

   public DataSet(int initialCapacity)
   {
      data = new String[initialCapacity];
      nextIndex = 0;
      bubble = new Sort();
   }

   public DataSet()
   {
      this(DEFAULT_VALUE);
      nextIndex = 0;
      bubble = new Sort();
   }

   public void insert(String value)
   {
      if (nextIndex < data.length)
      {
         data[nextIndex] = value;
         nextIndex++;
         bubble.bubble_sort(data, nextIndex);
      }
      else
      {
         expandCapacity();
         insert(value);
      }
   }

   public int getLength()
   {
      return nextIndex + 1;
   }


   public boolean member(String value)
   {
      for (int i = 0; i < data.length; i++)
      {

         if (data[i] != null && nextIndex != 10)
         {
            if (data[i].equals(value))
               return true;
         }
      }
      return false;
   }

   private void expandCapacity()
   {
      String[] larger = new String[data.length * 2];
      for (int i = 0; i < data.length; i++)
      {
         data = larger;
      }
   }
}

WordReader 类没有太大变化。ArrayList 被替换为简单数组,存储方式也进行了修改:

package main;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

public class WordReader
{

   private File file;

   private String[] words;

   private int nextFreeIndex;

   private BufferedReader in;

   private int DEFAULT_SIZE = 200;

   private String word;

   public WordReader(File file) throws IOException
   {
      words = new String[DEFAULT_SIZE];
      in = new BufferedReader(new FileReader(file));
      nextFreeIndex = 0;
   }

   public void expand()
   {
      String[] newArray = new String[words.length * 2];
      // System.arraycopy(words, 0, newArray, 0, words.length);
      for (int i = 0; i < words.length; i++)
         newArray[i] = words[i];
      words = newArray;
   }

   public void read() throws IOException
   {

   }

   public String readNext() throws IOException
   {
      char nextCharacter = (char) in.read();

      while (in.ready())
      {
         while (isWhiteSpace(nextCharacter) || !isCharacter(nextCharacter))
         {
            // word = "";
            nextCharacter = (char) in.read();

            if (!in.ready())
            {
               break;
            }
         }

         word = "";
         while (isCharacter(nextCharacter))
         {
            word += nextCharacter;
            nextCharacter = (char) in.read();
         }
         storeWord(word);

         return word;
      }

      return word;
   }

   private void storeWord(String word)
   {
      if (nextFreeIndex < words.length)
      {
         words[nextFreeIndex] = word;
         nextFreeIndex++;
      }
      else
      {
         expand();
         storeWord(word);
      }

   }

   private boolean isWhiteSpace(char next)
   {
      if ((next == ' ') || (next == '\t') || (next == '\n'))
      {
         return true;
      }
      return false;
   }

   private boolean isCharacter(char next)
   {
      if ((next >= 'a') && (next <= 'z'))
      {
         return true;
      }
      if ((next >= 'A') && (next <= 'Z'))
      {
         return true;
      }
      return false;
   }

   public boolean fileExists()
   {
      return file.exists();
   }

   public boolean fileReadable()
   {
      return file.canRead();
   }

   public Object wordsLength()
   {
      return words.length;
   }

   public void close() throws IOException
   {
      in.close();
   }

   public String[] getWords()
   {
      return words;
   }

}

并且为字符串更改了冒泡排序类:

package sort;

public class Sort
{
   public void bubble_sort(String a[], int length)
   {
      for (int j = 0; j < length; j++)
      {
         for (int i = j + 1; i < length; i++)
         {
            if (a[i].compareTo(a[j]) < 0)
            {
               String t = a[j];
               a[j] = a[i];
               a[i] = t;
            }
         }
      }
   }
}
4

1 回答 1

0

我想实际阻塞的方法是WordReader.readNext(). 我的建议是你使用Scanner而不是BufferedReader,它更适合将文件解析为单词。

您的readNext()方法可以这样重做(其中 scan 是扫描仪):

public String readNext() {
    if (scan.hasNext()) {
        String word = scan.next();
        if (!word.matches("[A-Za-z]+"))
            word = "";
        storeWord(word);
        return word;
    }
    return null;
}

这将具有与您的代码相同的功能(不使用isCharacter()isWhitespace()- 正则表达式(内部matches())检查一个单词是否仅包含字符。该isWhitespace()功能是内置在next()分隔单词的方法中。添加的功能是它返回 null 时有文件中没有更多的单词。

您必须更改 Test 类中的 while-loop 才能正常工作,否则您将得到一个NullPointerException- 只需切换循环定义中的两个条件(之前总是检查 null ,否则第一个会给出 NPE 和空检查没用)。

要制作扫描仪,您可以使用 aBufferedReader作为参数或File直接使用,如下所示:

Scanner scan = new Scanner(file); 
于 2013-03-23T15:00:21.477 回答