1
public static void CountWordFrequency(ArrayList<String> UserString) {
        //creating an array list to store every word
        //each element in the UserString is one line

        ArrayList<String> words_storage = new ArrayList<String>();
        String words[]= {};
        
        for(int i=0;i<UserString.size();i++) {//this is outer loop to access every line of the ArrayList
            //we need to split the line and put them inside the array String
            words = UserString.get(i).split("\\s");
            
            //we still need to work with the "\'" , the upper case, and the dot and comma
            
            for(int j=0;j<words.length;j++) {
                
                for(int k=0;k<words[j].length();k++) {//access every character of one word
                    
                    if(Character.isUpperCase(words[j].charAt(k))) {//first I want to convert them to Lower Case first
                        words[j]=words[j].toLowerCase();
                    }
                        
                    if(!Character.isLetterOrDigit(words[j].charAt(k)) && words[j].charAt(k)!=',' && words[j].charAt(k)!= '.') {
                        //I  am separating the comma and dot situations with the ' \' '
                        //need more work on this
                        if(words[j].compareTo("can't")==0) {
                            words[j]=words[j].replace(words[j].charAt(k), '\0');
                            words[j]=words[j].replace(words[j].charAt(k+1), '\0');
                            words[j] = "can";
                            words_storage.add("not");
                        }
                        else {
                            words[j]=words[j].replace(words[j].charAt(k), '\0');
                            words_storage.add("is");
                        }
                    }
                    
                    //now if the that character is comma or dot
                    if(words[j].charAt(k)==',' ||words[j].charAt(k)=='.') {
                        words[j]=words[j].replace(words[j].charAt(k), '\0');
                    }
                        
                }//done with one-word loop
            }
            
            //now we need to store every element of the String Array inside the array list
            
            for(int j=0;j<words.length;j++) {
                words_storage.add(words[j]);
            }
            
        }//this is the end of the outer loop
         
         //since it's harder to change the content of element in array list compared to array
         //we need to store elements in another array
        
        String[] array = new String[words_storage.size()];
        
        for(int a =0;a<words_storage.size();a++) {
            array[a] = words_storage.get(a);
        }
        //now when we are done with storing elements, we need to sort alphabetically
        
        for(int a=0;a<array.length;a++) {
            
            for(int b = a+1;b<array.length;b++) {
                
                if(array[a].compareTo(array[b])>0) {
                    String temp = array[a];
                    array[a] = array[b];
                    array[b] = temp;
                }
                
            }
        }
        
        //now we count the frequency of each element in the Array array
        int marker = 0;//marker will help me skip the word that already counted in the frequency
        for(int x =0;x<array.length;x=marker) {
            
            int counter = 1;
            for(int y =x+1; y< array.length;y++) {
                
                if(array[x].compareTo(array[y])==0) {//if they have the same content then we increase the counter and mark the y
                    counter++;
                    marker = y+1;
                }
            }
            
            if(counter==1) {//if we did not find any similar word, we need to increase the marker by one to check on the next word
                marker++;
            }
            System.out.println(array[x]+":"+counter); //now just print it out
        }
        
        
    }

嘿伙计们,我正在尝试计算给定输入中的词频,它有很多行。我将它存储在 ArrayList 中并将其作为参数。首先,我尝试先按 aphabetically 对它们进行排序

现在,我正在尝试删除单词 can't 中的字符 '。但它似乎没有用。所以我尝试使用替换方法,但是当我用 '\0' 替换它时它会留下空白

希望我得到了一些解决方案。提前致谢。

4

1 回答 1

0

只需使用compareTo()compareToIgnoreCase()方法来查找单词。

于 2020-11-17T02:46:51.397 回答