对于那些寻求这个问题答案的人:
您应该对 Unicode 字符进行一些操作。您通常知道的每个波斯字符(例如 س ش ت ظ)实际上有 4 种不同的形式,每种形式都有自己的 Unicode 字符。
让我们说سا这个词
在这种情况下,最初的 س 有一个 Unicode 字符,它与 راس 中的 س 不同,后者位于单词的末尾。
为了更好地理解,请看下图

如何获得每种形式的 Unicode 字符?
只需访问此网站https://www.compart.com/en/unicode/并搜索您的角色。
请注意,阿拉伯语和波斯语几乎使用相同的脚本,这就是为什么在我上传的图片中,它表示س 字符为“阿拉伯语字母已隔离形式”。
这是一个可用于计算 4 种不同形式的波斯字符的类:
public class PersianCharachtersUnicode {
char c;
private String InitialFom_Unicode;
private String MedialForm_Unicode;
private String FinalForm_Unicode;
private String IsolatedForm_Unicode;
public void setCharc (char c) {
this.c = c;
calculate();
}
private void calculate() {
switch (c) {
case 'آ':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\0";
IsolatedForm_Unicode = "\uFE81";
break;
case 'ا':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFE8E";
IsolatedForm_Unicode = "\uFE8D";
break;
case 'ب':
InitialFom_Unicode = "\uFE91";
MedialForm_Unicode = "\uFE92";
FinalForm_Unicode = "\uFE90";
IsolatedForm_Unicode = "\uFE8F";
break;
case 'پ':
InitialFom_Unicode = "\uFB58";
MedialForm_Unicode = "\uFB59";
FinalForm_Unicode = "\uFB57";
IsolatedForm_Unicode = "\uFB56";
break;
case 'ت':
InitialFom_Unicode = "\uFE97";
MedialForm_Unicode = "\uFE98";
FinalForm_Unicode = "\uFE96";
IsolatedForm_Unicode = "\uFE95";
break;
case 'ث':
InitialFom_Unicode = "\uFE9B";
MedialForm_Unicode = "\uFE9C";
FinalForm_Unicode = "\uFE9A";
IsolatedForm_Unicode = "\uFE99";
break;
case 'ج':
InitialFom_Unicode = "\uFE9F";
MedialForm_Unicode = "\uFEA0";
FinalForm_Unicode = "\uFE9E";
IsolatedForm_Unicode = "\uFE9D";
break;
case 'چ':
InitialFom_Unicode = "\uFB7C";
MedialForm_Unicode = "\uFB7D";
FinalForm_Unicode = "\uFE9B";
IsolatedForm_Unicode = "\uFB7A";
break;
case 'ح':
InitialFom_Unicode = "\uFEA3";
MedialForm_Unicode = "\uFEA4";
FinalForm_Unicode = "\uFEA2";
IsolatedForm_Unicode = "\uFEA1";
break;
case 'خ':
InitialFom_Unicode = "\uFEA7";
MedialForm_Unicode = "\uFEA8";
FinalForm_Unicode = "\uFEA6";
IsolatedForm_Unicode = "\uFEA5";
break;
case 'د':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEAA";
IsolatedForm_Unicode = "\uFEA9";
break;
case 'ذ':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEAC";
IsolatedForm_Unicode = "\uFEAB";
break;
case 'ر':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEAE";
IsolatedForm_Unicode = "\uFEAD";
break;
case 'ز':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEB0";
IsolatedForm_Unicode = "\uFEAF";
break;
case 'ژ':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFB8B";
IsolatedForm_Unicode = "\uFB8A";
break;
case 'س':
InitialFom_Unicode = "\uFEB3";
MedialForm_Unicode = "\uFEB4";
FinalForm_Unicode = "\uFEB2";
IsolatedForm_Unicode = "\uFEB1";
break;
case 'ش':
InitialFom_Unicode = "\uFEB7";
MedialForm_Unicode = "\uFEB8";
FinalForm_Unicode = "\uFEB6";
IsolatedForm_Unicode = "\uFEB5";
break;
case 'ص':
InitialFom_Unicode = "\uFEBB";
MedialForm_Unicode = "\uFEBC";
FinalForm_Unicode = "\uFEBA";
IsolatedForm_Unicode = "\uFEB9";
break;
case 'ض':
InitialFom_Unicode = "\uFEBF";
MedialForm_Unicode = "\uFEC0";
FinalForm_Unicode = "\uFEBE";
IsolatedForm_Unicode = "\uFEBD";
break;
case 'ط':
InitialFom_Unicode = "\uFEC3";
MedialForm_Unicode = "\uFEC4";
FinalForm_Unicode = "\uFEC2";
IsolatedForm_Unicode = "\uFEC1";
break;
case 'ظ':
InitialFom_Unicode = "\uFEC7";
MedialForm_Unicode = "\uFEC8";
FinalForm_Unicode = "\uFEC6";
IsolatedForm_Unicode = "\uFEC5";
break;
case 'ع':
InitialFom_Unicode = "\uFECB";
MedialForm_Unicode = "\uFECC";
FinalForm_Unicode = "\uFECA";
IsolatedForm_Unicode = "\uFEC9";
break;
case 'غ':
InitialFom_Unicode = "\uFECF";
MedialForm_Unicode = "\uFED0";
FinalForm_Unicode = "\uFECE";
IsolatedForm_Unicode = "\uFECD";
break;
case 'ف':
InitialFom_Unicode = "\uFED3";
MedialForm_Unicode = "\uFED4";
FinalForm_Unicode = "\uFED2";
IsolatedForm_Unicode = "\uFED1";
break;
case 'ق':
InitialFom_Unicode = "\uFED7";
MedialForm_Unicode = "\uFED8";
FinalForm_Unicode = "\uFED6";
IsolatedForm_Unicode = "\uFED5";
break;
case 'ک':
InitialFom_Unicode = "\uFB90";
MedialForm_Unicode = "\uFB91";
FinalForm_Unicode = "\uFB8F";
IsolatedForm_Unicode = "\uFB8E";
break;
case 'گ':
InitialFom_Unicode = "\uFB94";
MedialForm_Unicode = "\uFB95";
FinalForm_Unicode = "\uFB93";
IsolatedForm_Unicode = "\uFB92";
break;
case 'ل':
InitialFom_Unicode = "\uFEDF";
MedialForm_Unicode = "\uFEE0";
FinalForm_Unicode = "\uFEDE";
IsolatedForm_Unicode = "\uFEDD";
break;
case 'م':
InitialFom_Unicode = "\uFEE3";
MedialForm_Unicode = "\uFEE4";
FinalForm_Unicode = "\uFEE2";
IsolatedForm_Unicode = "\uFEE1";
break;
case 'ن':
InitialFom_Unicode = "\uFEE7";
MedialForm_Unicode = "\uFEE8";
FinalForm_Unicode = "\uFEE6";
IsolatedForm_Unicode = "\uFEE5";
break;
case 'و':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEEE";
IsolatedForm_Unicode = "\uFEED";
break;
case 'ه':
InitialFom_Unicode = "\uFEEB";
MedialForm_Unicode = "\uFEEC";
FinalForm_Unicode = "\uFEEA";
IsolatedForm_Unicode = "\uFEE9";
break;
case 'ی':
InitialFom_Unicode = "\uFBFE";
MedialForm_Unicode = "\uFBFF";
FinalForm_Unicode = "\uFBFD";
IsolatedForm_Unicode = "\uFBFC";
break;
default:
break;
}
}
/**
* @return the initialFom_Unicode
*/
public String getInitialFom_Unicode() {
return InitialFom_Unicode;
}
/**
* @return the finalForm_Unicode
*/
public String getFinalForm_Unicode() {
return FinalForm_Unicode;
}
/**
* @return the isolatedForm_Unicode
*/
public String getIsolatedForm_Unicode() {
return IsolatedForm_Unicode;
}
/**
* @return the medialForm_Unicode
*/
public String getMedialForm_Unicode() {
return MedialForm_Unicode;
}
}