0

这是一个代码片段。它给出了arrayindexoutofboundexception。不知道为什么?

import java.io.File;
import java.io.FileInputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xslf.usermodel.XSLFTextParagraph;


public class wordcount 
{
public static void main(String[] args) throws Exception
{


        File file = new File("E:\\myFiles\\abc.doc");
        FileInputStream fis=new FileInputStream(file.getAbsolutePath());
        HWPFDocument document=new HWPFDocument(fis);
        WordExtractor extractor = new WordExtractor(document);
        String [] fileData = extractor.getParagraphText();

        for (int i = 0; i < fileData.length; i++)
            {  

               // System.out.println(fileData[i].toString());
                String[] paraword = fileData[i].toString().split(" ");
               // out.println(paraword.length);

                if(paraword[i].length() == 0 )
                {
                    System.out.println("\n");
                }
                else if(paraword[i].length() > 0 && paraword[i].length() < 12)
                {
                    for(int k=0 ; k < paraword[i].length()-1 ; k++)
                    {
                       System.out.println(paraword[k].toString());
                    }
                }
                else if(paraword[i].length() >= 12  )
                {
                    for(int k=0 ; k < 12 ; k++)
                    {
                       System.out.println(paraword[k].toString());
                    }
                }


               System.out.println("\n");

            }

}
}

这是 abc.doc 文件的图像

这是 abc.doc

注意:预期的输出将打印在 java 控制台上。

并且输出将在每行中包含 12 个单词。但是在执行第一行之后,就会发生错误。

任何帮助,将不胜感激

TIA

4

2 回答 2

1

老实说,我不熟悉 apache.org API,但仅通过查看您的逻辑,您似乎想要替换以下每个实例:

paraword[i].length()

和:

paraword.length

因为看起来您想检查段落中有多少单词,而不是段落的第一个单词有多长。如果我错了,请纠正我,但我认为这会解决你的问题。

于 2012-11-15T22:51:49.510 回答
0

这是正确的代码片段

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;

public class ExtractWordDocument
{
public String myString() throws IOException
  {
      File file = new File("PATH FOR THE .doc FILE");
        FileInputStream fis=new FileInputStream(file.getAbsolutePath());
        HWPFDocument document=new HWPFDocument(fis);
        WordExtractor extractor = new WordExtractor(document);
        String [] fileData = extractor.getParagraphText();

        ArrayList<Object> EntireDoc = new ArrayList<>();

        for (int i = 0; i < fileData.length; i++)
            {  
                String[] paraword = fileData[i].toString().split("\\s+");
                if(paraword.length == 0 )
                    {EntireDoc.add("\n");}
                else if(paraword.length > 0 && paraword.length < 12)
                    {
                    for(int k=0 ; k < paraword.length ; k++)
                        {EntireDoc.add(paraword[k].toString()+" ");}
                    }
                else if(paraword.length > 12  )
                    {
                    java.util.List<String> arrAsList = Arrays.asList(paraword);
                    String formatedString = arrAsList.toString()
                                             .replace(",", "")  //remove the commas
                                             .replace("[", "")   //remove the right bracket
                                             .replace("]", ""); //remove the left bracket

                     StringBuilder sb = new StringBuilder(formatedString);
                     int i1 = 0;
                     while ((i1 = sb.indexOf(" ", i1 + 75)) != -1)
                        {sb.replace(i1, i1 + 1, "\n");}
                     EntireDoc.add(sb.toString());
                    }

               EntireDoc.add("\n");
             }

            String formatedString = EntireDoc.toString()
                                             .replace(",", "")  //remove the commas
                                             .replace("[", "")   //remove the right bracket
                                             .replace("]", ""); //remove the left bracket

        return formatedString;
  }

public static void main(String[] args)
{
    try{
    System.out.print(new ExtractWordDocument().myString());
    }
    catch(IOException ioe){System.out.print(ioe);}

}

}

注意:此代码不会在每行打印 12 个单词,而是在每行打印 75 个字符。

于 2012-11-17T16:48:49.343 回答