我有一个问题,在词干之后我无法获得 ($,...) 之类的符号,因为在输出中我需要示例字符串 words = 44,66$ 的价格;// 在词干提取之后 // 输出 = 4466 字符串的词干提取非常好,但我想要数字(价格)不会改变价格,因为我需要输出后的价格。最后我的问题是如何在这个词干代码之后获得带有符号的价格数字(44.33 $)?
////// 这里所有代码“”
class NewStrings {
public String str;
NewString() {
str = "";
}
}
public class Stemmer {
private String Clean( String str ) {
int last = str.length();
Character ch = new Character( str.charAt(0) );
String temp = "";
for ( int i=0; i < last; i++ ) {
if ( ch.isLetterOrDigit( str.charAt(i) ) )
temp += str.charAt(i);
}
return temp;
} //clean
private boolean hasSuffix( String word, String suffix, NewString stem ) {
String tmp = "";
if ( word.length() <= suffix.length() )
return false;
if (suffix.length() > 1)
if ( word.charAt( word.length()-2 ) != suffix.charAt( suffix.length()-2 ) )
return false;
stem.str = "";
for ( int i=0; i<word.length()-suffix.length(); i++ )
stem.str += word.charAt( i );
tmp = stem.str;
for ( int i=0; i<suffix.length(); i++ )
tmp += suffix.charAt( i );
if ( tmp.compareTo( word ) == 0 )
return true;
else
return false;
}
private boolean vowel( char ch, char prev ) {
switch ( ch ) {
case 'a': case 'e': case 'i': case 'o': case 'u':
return true;
case 'y': {
switch ( prev ) {
case 'a': case 'e': case 'i': case 'o': case 'u':
return false;
default:
return true;
}
}
default :
return false;
}
}
private int measure( String stem ) {
int i=0, count = 0;
int length = stem.length();
while ( i < length ) {
for ( ; i < length ; i++ ) {
if ( i > 0 ) {
if ( vowel(stem.charAt(i),stem.charAt(i-1)) )
break;
}
else {
if ( vowel(stem.charAt(i),'a') )
break;
}
}
for ( i++ ; i < length ; i++ ) {
if ( i > 0 ) {
if ( !vowel(stem.charAt(i),stem.charAt(i-1)) )
break;
}
else {
if ( !vowel(stem.charAt(i),'?') )
break;
}
}
if ( i < length ) {
count++;
i++;
}
} //while
return(count);
}
private boolean containsVowel( String word ) {
for (int i=0 ; i < word.length(); i++ )
if ( i > 0 ) {
if ( vowel(word.charAt(i),word.charAt(i-1)) )
return true;
}
else {
if ( vowel(word.charAt(0),'a') )
return true;
}
return false;
}
private boolean cvc( String str ) {
int length=str.length();
if ( length < 3 )
return false;
if ( (!vowel(str.charAt(length-1),str.charAt(length-2)) )
&& (str.charAt(length-1) != 'w') && (str.charAt(length-1) != 'x') && (str.charAt(length-1) != 'y')
&& (vowel(str.charAt(length-2),str.charAt(length-3))) ) {
if (length == 3) {
if (!vowel(str.charAt(0),'?'))
return true;
else
return false;
}
else {
if (!vowel(str.charAt(length-3),str.charAt(length-4)) )
return true;
else
return false;
}
}
return false;
}
private String step1( String str ) {
NewString stem = new NewString();
if ( str.charAt( str.length()-1 ) == 's' ) {
if ( (hasSuffix( str, "sses", stem )) || (hasSuffix( str, "ies", stem)) ){
String tmp = "";
for (int i=0; i<str.length()-2; i++)
tmp += str.charAt(i);
str = tmp;
}
else {
if ( ( str.length() == 1 ) && ( str.charAt(str.length()-1) == 's' ) ) {
str = "";
return str;
}
if ( str.charAt( str.length()-2 ) != 's' ) {
String tmp = "";
for (int i=0; i<str.length()-1; i++)
tmp += str.charAt(i);
str = tmp;
}
}
}
if ( hasSuffix( str,"eed",stem ) ) {
if ( measure( stem.str ) > 0 ) {
String tmp = "";
for (int i=0; i<str.length()-1; i++)
tmp += str.charAt( i );
str = tmp;
}
}
else {
if ( (hasSuffix( str,"ed",stem )) || (hasSuffix( str,"ing",stem )) ) {
if (containsVowel( stem.str )) {
String tmp = "";
for ( int i = 0; i < stem.str.length(); i++)
tmp += str.charAt( i );
str = tmp;
if ( str.length() == 1 )
return str;
if ( ( hasSuffix( str,"at",stem) ) || ( hasSuffix( str,"bl",stem ) ) || ( hasSuffix( str,"iz",stem) ) ) {
str += "e";
}
else {
int length = str.length();
if ( (str.charAt(length-1) == str.charAt(length-2))
&& (str.charAt(length-1) != 'l') && (str.charAt(length-1) != 's') && (str.charAt(length-1) != 'z') ) {
tmp = "";
for (int i=0; i<str.length()-1; i++)
tmp += str.charAt(i);
str = tmp;
}
else
if ( measure( str ) == 1 ) {
if ( cvc(str) )
str += "e";
}
}
}
}
}
if ( hasSuffix(str,"y",stem) )
if ( containsVowel( stem.str ) ) {
String tmp = "";
for (int i=0; i<str.length()-1; i++ )
tmp += str.charAt(i);
str = tmp + "i";
}
return str;
}
private String step2( String str ) {
String[][] suffixes = { { "ational", "ate" },
{ "tional", "tion" },
{ "enci", "ence" },
{ "anci", "ance" },
{ "izer", "ize" },
{ "iser", "ize" },
{ "abli", "able" },
{ "alli", "al" },
{ "entli", "ent" },
{ "eli", "e" },
{ "ousli", "ous" },
{ "ization", "ize" },
{ "isation", "ize" },
{ "ation", "ate" },
{ "ator", "ate" },
{ "alism", "al" },
{ "iveness", "ive" },
{ "fulness", "ful" },
{ "ousness", "ous" },
{ "aliti", "al" },
{ "iviti", "ive" },
{ "biliti", "ble" }};
NewString stem = new NewString();
for ( int index = 0 ; index < suffixes.length; index++ ) {
if ( hasSuffix ( str, suffixes[index][0], stem ) ) {
if ( measure ( stem.str ) > 0 ) {
str = stem.str + suffixes[index][1];
return str;
}
}
}
return str;
}
private String step3( String str ) {
String[][] suffixes = { { "icate", "ic" },
{ "ative", "" },
{ "alize", "al" },
{ "alise", "al" },
{ "iciti", "ic" },
{ "ical", "ic" },
{ "ful", "" },
{ "ness", "" }};
NewString stem = new NewString();
for ( int index = 0 ; index<suffixes.length; index++ ) {
if ( hasSuffix ( str, suffixes[index][0], stem ))
if ( measure ( stem.str ) > 0 ) {
str = stem.str + suffixes[index][1];
return str;
}
}
return str;
}
private String step4( String str ) {
String[] suffixes = { "al", "ance", "ence", "er", "ic", "able", "ible", "ant", "ement", "ment", "ent", "sion", "tion",
"ou", "ism", "ate", "iti", "ous", "ive", "ize", "ise"};
NewString stem = new NewString();
for ( int index = 0 ; index<suffixes.length; index++ ) {
if ( hasSuffix ( str, suffixes[index], stem ) ) {
if ( measure ( stem.str ) > 1 ) {
str = stem.str;
return str;
}
}
}
return str;
}
private String step5( String str ) {
if ( str.charAt(str.length()-1) == 'e' ) {
if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */
String tmp = "";
for ( int i=0; i<str.length()-1; i++ )
tmp += str.charAt( i );
str = tmp;
}
else
if ( measure(str) == 1 ) {
String stem = "";
for ( int i=0; i<str.length()-1; i++ )
stem += str.charAt( i );
if ( !cvc(stem) )
str = stem;
}
}
if ( str.length() == 1 )
return str;
if ( (str.charAt(str.length()-1) == 'l') && (str.charAt(str.length()-2) == 'l') && (measure(str) > 1) )
if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */
String tmp = "";
for ( int i=0; i<str.length()-1; i++ )
tmp += str.charAt( i );
str = tmp;
}
return str;
}
// 这是你的添加代码
/*** chrs besides these will be removed */
static final String allowedChrs = "$€¥£0123456789,.";
public static String step6(String str){
int sizeOfString = str.length();
StringBuilder tmp = new StringBuilder();
for(int i=0;i<sizeOfString;++i){
if(allowedChrs.indexOf(str.charAt(i)) > -1){
tmp.append(str.charAt(i));
}
}
return tmp.toString();
}
private String stripPrefixes ( String str) {
String[] prefixes = { "kilo", "micro", "milli", "intra", "ultra", "mega", "nano", "pico", "pseudo"};
int last = prefixes.length;
for ( int i=0 ; i<last; i++ ) {
if ( str.startsWith( prefixes[i] ) ) {
String temp = "";
for ( int j=0 ; j< str.length()-prefixes[i].length(); j++ )
temp += str.charAt( j+prefixes[i].length() );
return temp;
}
}
return str;
}
private String stripSuffixes( String str ) {
str = step1( str );
if ( str.length() >= 1 )
str = step2( str );
if ( str.length() >= 1 )
str = step3( str );
if ( str.length() >= 1 )
str = step4( str );
if ( str.length() >= 1 )
str = step5( str );
if ( str.length() >= 1 )
str = step6( str );
return str;
}
public static void main(String[] args) {
String Word = "3.4$";
// String str = stripAffixes (Word);
porrrr fun = new porrrr();
fun.stripAffixes(Word);
System.out.println(fun.stripAffixes(Word));
} //stripAffixes
public String stripAffixes( String str ) {
str = str.toLowerCase();
str = Clean(str);
if (( str != "" ) && (str.length() > 2)) {
str = stripPrefixes(str);
if (str != "" )
str = stripSuffixes(str);
}
return str;
} //stripAffixes
} //class