0

我有一个表格,它是使用 jQuery 插件 Datatables 从粘贴到 textarea 中的任何文本生成的。我想忽略数字以​​及大量常用词,例如“and, or, for, the, but, etc.” 我该怎么办?

这是我当前的 JavaScript:

<script>
        $( document ).ready( function() {

            $('form').submit(function(event){
                    event.preventDefault();
                    var
                            keyword_list = $('#searchtext').val().split(" "),
                            word_list = $('#searchtext').val().split(" "),
                            nwords = word_list.length;
                            $('#result').html(nwords + " Total Words in Article");
                    keyword_dict = {};
                    for (var i = 0, w; w = keyword_list[i]; i++) {
                            var w = w.replace(/\W/g,'');
                            keyword_dict[w] = 0;
                    }
                    for (var i = 0, w; w = word_list[i]; i++) {
                            var w = w.replace(/\W/g,'');
                            for(var keyword in keyword_dict){
                                    if (keyword == w){
                                            keyword_dict[w] += 1;
                                    }
                            }
                    }

                    items = '<thead><th>unique keywords</th><th>occurance</th><th>percent of text</th></thead>';
                    for (keyword in keyword_dict){
                            var occ = keyword_dict[keyword]*100/nwords;
                            var c=''
                            items += '<tr class="'+c+'"><td>'+keyword+'</td><td>'+keyword_dict[keyword]+'</td><td>'+occ.toPrecision(2)+'</td></tr>';
                        }
                    console.log(keyword_dict);
                    $('#result').append('<table>'+items+'<table>');
                    $('#result table').dataTable();
            });
     });
</script>

这是HTML:

<form>
    <label>Article:</label><br>
    <textarea rows="25" cols="100" id="searchtext"></textarea><br>
    <input class="btn btn-success" id="go" type="submit" value="Submit"></input>
</form>

<div id="result">
</div>
4

2 回答 2

1

这是一个执行此操作的函数:

function cleanUpText(text) {
    //add more words separated by | here
    var commonWords=/and|or|for|the|but|etc|/g; 

    //remove the common words
    text=text.replace(commonWords, '');

    //remove numbers
    text=text.replace(/\d/g, '');

    //remove consecutive whitespaces
    text=text.replace(/\s{2,}/g, ' ');

    return text;
}

测试 :

var text='javascript 123 5656 787878 for the but and or function 56 the to and or remove 56 90009090 not 2121 needed or and content ';

console.log(cleanUpText(text));

输出:

javascript函数删除不需要的内容

于 2013-11-12T10:35:27.303 回答
0

jQuery inArray函数返回项目的索引,如果没有,则返回 -1:

var commonWords = ['and','or','the'];
if($.inArray( keyword.toLowerCase(), commonWords)==-1){  
     // it's not one of common words
}
else{
     // it is one of common words
}

编辑:我添加了 toLowerCase 假设您想忽略单词的大小写敏感性。

于 2013-11-11T21:16:26.743 回答