我阅读了您对此的讨论,并且可能需要更强大的实现。特别是考虑到您需要支持变音符号。使用单个正则表达式来解决所有问题可能看起来很诱人,但它越复杂就越难以维护或扩展。引用杰米·扎温斯基的话
有些人在遇到问题时会想“我知道,我会使用正则表达式”。现在他们有两个问题。
由于我在本地机器上遇到问题,因此我使用了更简单的实现,如果您的情况需要,请iconv
随意使用更复杂或更强大的东西。
我在此解决方案中使用了一个简单的正则表达式来仅获取一组字母数字字符(也称为“单词”),正则表达式中读取的部分\p{L}\p{M}
确保我们也获得所有多字节字符。
您可以在 IDEone 上看到此代码。
<?php
function stripAccents($p_sSubject) {
$sSubject = (string) $p_sSubject;
$sSubject = str_replace('æ', 'ae', $sSubject);
$sSubject = str_replace('Æ', 'AE', $sSubject);
$sSubject = strtr(
utf8_decode($sSubject)
, utf8_decode('àáâãäåçèéêëìíîïñòóôõöøùúûüýÿÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝ')
, 'aaaaaaceeeeiiiinoooooouuuuyyAAAAAACEEEEIIIINOOOOOOUUUUY'
);
return $sSubject;
}
function emphasiseWord($p_sSubject, $p_sSearchTerm){
$aSubjects = preg_split('#([^a-z0-9\p{L}\p{M}]+)#iu', $p_sSubject, null, PREG_SPLIT_DELIM_CAPTURE);
foreach($aSubjects as $t_iKey => $t_sSubject){
$sSubject = stripAccents($t_sSubject);
if(stripos($sSubject, $p_sSearchTerm) !== false || mb_stripos($t_sSubject, $p_sSearchTerm) !== false){
$aSubjects[$t_iKey] = '<strong>' . $t_sSubject . '</strong>';
}
}
$sSubject = implode('', $aSubjects);
return $sSubject;
}
/////////////////////////////// Test \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
$aTest = array(
'goo' => 'I love Google to make my searches, but I`m starting to worry about privacy.'
, 'peo' => 'people, People, PEOPLE, peOple, people!, people., people?, "people, people" péo'
, 'péo' => 'people, People, PEOPLE, peOple, people!, people., people?, "people, people" péo'
, 'gen' => '"gente", "inteligente", "VAGENS", and "Gente" ...vocês da física que passam o dia protegendo...'
, 'voce' => '...vocês da física que passam o dia protegendo...'
, 'o' => 'Characters like æ,ø,å,Æ,Ø and Å are used in Denmark, Sweden and Norway'
, 'ø' => 'Characters like æ,ø,å,Æ,Ø and Å are used in Denmark, Sweden and Norway'
, 'ae' => 'Characters like æ,ø,å,Æ,Ø and Å are used in Denmark, Sweden and Norway'
, 'Æ' => 'Characters like æ,ø,å,Æ,Ø and Å are used in Denmark, Sweden and Norway'
);
$sContent = '<dl>';
foreach($aTest as $t_sSearchTerm => $t_sSubject){
$sContent .= '<dt>' . $t_sSearchTerm . '</dt><dd>' . emphasiseWord($t_sSubject, $t_sSearchTerm) .'</dd>';
}
$sContent .= '</dl>';
echo $sContent;
?>