您应该能够只使用strtr与要转换的关联字符数组(数据可从 MSDN 获得,并转换为下面的 PHP 数组)。请注意,在此代码中,保留的字节值被替换为 U+FFFD 替换字符 ( "\xef\xbf\xbd"
)。
function win1255ToUtf8($str) {
static $tbl = null;
if (!$tbl) {
$tbl = array_combine(range("\x80", "\xff"), array(
"\xe2\x82\xac", "\xef\xbf\xbd", "\xe2\x80\x9a", "\xc6\x92",
"\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1",
"\xcb\x86", "\xe2\x80\xb0", "\xef\xbf\xbd", "\xe2\x80\xb9",
"\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xef\xbf\xbd", "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c",
"\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94",
"\xcb\x9c", "\xe2\x84\xa2", "\xef\xbf\xbd", "\xe2\x80\xba",
"\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xc2\xa0", "\xc2\xa1", "\xc2\xa2", "\xc2\xa3", "\xe2\x82\xaa",
"\xc2\xa5", "\xc2\xa6", "\xc2\xa7", "\xc2\xa8", "\xc2\xa9",
"\xc3\x97", "\xc2\xab", "\xc2\xac", "\xc2\xad", "\xc2\xae",
"\xc2\xaf", "\xc2\xb0", "\xc2\xb1", "\xc2\xb2", "\xc2\xb3",
"\xc2\xb4", "\xc2\xb5", "\xc2\xb6", "\xc2\xb7", "\xc2\xb8",
"\xc2\xb9", "\xc3\xb7", "\xc2\xbb", "\xc2\xbc", "\xc2\xbd",
"\xc2\xbe", "\xc2\xbf", "\xd6\xb0", "\xd6\xb1", "\xd6\xb2",
"\xd6\xb3", "\xd6\xb4", "\xd6\xb5", "\xd6\xb6", "\xd6\xb7",
"\xd6\xb8", "\xd6\xb9", "\xef\xbf\xbd", "\xd6\xbb", "\xd6\xbc",
"\xd6\xbd", "\xd6\xbe", "\xd6\xbf", "\xd7\x80", "\xd7\x81",
"\xd7\x82", "\xd7\x83", "\xd7\xb0", "\xd7\xb1", "\xd7\xb2",
"\xd7\xb3", "\xd7\xb4", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xef\xbf\xbd", "\xd7\x90", "\xd7\x91", "\xd7\x92", "\xd7\x93",
"\xd7\x94", "\xd7\x95", "\xd7\x96", "\xd7\x97", "\xd7\x98",
"\xd7\x99", "\xd7\x9a", "\xd7\x9b", "\xd7\x9c", "\xd7\x9d",
"\xd7\x9e", "\xd7\x9f", "\xd7\xa0", "\xd7\xa1", "\xd7\xa2",
"\xd7\xa3", "\xd7\xa4", "\xd7\xa5", "\xd7\xa6", "\xd7\xa7",
"\xd7\xa8", "\xd7\xa9", "\xd7\xaa", "\xef\xbf\xbd", "\xef\xbf\xbd",
"\xe2\x80\x8e", "\xe2\x80\x8f", "\xef\xbf\xbd",
));
}
return strtr($str, $tbl);
}
我用这个 PHP 脚本生成了上面的代码:
function win1255ToUtf8($str) {
static $tbl = null;
if (!$tbl) {
$tbl = array_combine(range("\x80", "\xff"), array(
<?php
function encodeString($str) {
return '"' . preg_replace('/../', '\x$0', bin2hex($str)) . '"';
}
function codepointToUtf8($n) {
return mb_convert_encoding(pack('V', $n), 'UTF-8', 'UTF-32LE');
}
$text = strip_tags( file_get_contents( 'http://msdn.microsoft.com/en-us/goglobal/cc305148.aspx') );
preg_match_all('/([0-9A-F]{2}) = U\+([0-9A-F]{4})/', $text, $matches, PREG_SET_ORDER);
$table = array_fill(0, 128, "\xef\xbf\xbd");
foreach ($matches as $match) {
$input = hexdec($match[1]) - 128;
if ($input >= 0) {
$table[$input] = codepointToUtf8(hexdec($match[2]));
}
}
$buf = '';
foreach ($table as $from => $to) {
$buf .= encodeString($to) . ', ';
}
echo wordwrap(substr($buf, 0, -1), 68, "\n "), "\n";
?>
));
}
return strtr($str, $tbl);
}