请查看以下两个类及其使用信息。它肯定会帮助你。
可读性音节计数模式库类:
<?php class ReadabilitySyllableCheckPattern {
public $probWords = [
'abalone' => 4,
'abare' => 3,
'abed' => 2,
'abruzzese' => 4,
'abbruzzese' => 4,
'aborigine' => 5,
'acreage' => 3,
'adame' => 3,
'adieu' => 2,
'adobe' => 3,
'anemone' => 4,
'apache' => 3,
'aphrodite' => 4,
'apostrophe' => 4,
'ariadne' => 4,
'cafe' => 2,
'calliope' => 4,
'catastrophe' => 4,
'chile' => 2,
'chloe' => 2,
'circe' => 2,
'coyote' => 3,
'epitome' => 4,
'forever' => 3,
'gethsemane' => 4,
'guacamole' => 4,
'hyperbole' => 4,
'jesse' => 2,
'jukebox' => 2,
'karate' => 3,
'machete' => 3,
'maybe' => 2,
'people' => 2,
'recipe' => 3,
'sesame' => 3,
'shoreline' => 2,
'simile' => 3,
'syncope' => 3,
'tamale' => 3,
'yosemite' => 4,
'daphne' => 2,
'eurydice' => 4,
'euterpe' => 3,
'hermione' => 4,
'penelope' => 4,
'persephone' => 4,
'phoebe' => 2,
'zoe' => 2
];
public $addSyllablePatterns = [
"([^s]|^)ia",
"iu",
"io",
"eo($|[b-df-hj-np-tv-z])",
"ii",
"[ou]a$",
"[aeiouym]bl$",
"[aeiou]{3}",
"[aeiou]y[aeiou]",
"^mc",
"ism$",
"asm$",
"thm$",
"([^aeiouy])\1l$",
"[^l]lien",
"^coa[dglx].",
"[^gq]ua[^auieo]",
"dnt$",
"uity$",
"[^aeiouy]ie(r|st|t)$",
"eings?$",
"[aeiouy]sh?e[rsd]$",
"iell",
"dea$",
"real",
"[^aeiou]y[ae]",
"gean$",
"riet",
"dien",
"uen"
];
public $prefixSuffixPatterns = [
"^un",
"^fore",
"^ware",
"^none?",
"^out",
"^post",
"^sub",
"^pre",
"^pro",
"^dis",
"^side",
"ly$",
"less$",
"some$",
"ful$",
"ers?$",
"ness$",
"cians?$",
"ments?$",
"ettes?$",
"villes?$",
"ships?$",
"sides?$",
"ports?$",
"shires?$",
"tion(ed)?$"
];
public $subSyllablePatterns = [
"cia(l|$)",
"tia",
"cius",
"cious",
"[^aeiou]giu",
"[aeiouy][^aeiouy]ion",
"iou",
"sia$",
"eous$",
"[oa]gue$",
".[^aeiuoycgltdb]{2,}ed$",
".ely$",
"^jua",
"uai",
"eau",
"[aeiouy](b|c|ch|d|dg|f|g|gh|gn|k|l|ll|lv|m|mm|n|nc|ng|nn|p|r|rc|rn|rs|rv|s|sc|sk|sl|squ|ss|st|t|th|v|y|z)e$",
"[aeiouy](b|c|ch|dg|f|g|gh|gn|k|l|lch|ll|lv|m|mm|n|nc|ng|nch|nn|p|r|rc|rn|rs|rv|s|sc|sk|sl|squ|ss|th|v|y|z)ed$",
"[aeiouy](b|ch|d|f|gh|gn|k|l|lch|ll|lv|m|mm|n|nch|nn|p|r|rn|rs|rv|s|sc|sk|sl|squ|ss|st|t|th|v|y)es$",
"^busi$"
]; } ?>
另一类是可读性算法类,有两种计算分数的方法:
<?php class ReadabilityAlgorithm {
function countSyllable($strWord) {
$pattern = new ReadabilitySyllableCheckPattern();
$strWord = trim($strWord);
// Check for problem words
if (isset($pattern->{'probWords'}[$strWord])) {
return $pattern->{'probWords'}[$strWord];
}
// Check prefix, suffix
$strWord = str_replace($pattern->{'prefixSuffixPatterns'}, '', $strWord, $tmpPrefixSuffixCount);
// Removed non word characters from word
$arrWordParts = preg_split('`[^aeiouy]+`', $strWord);
$wordPartCount = 0;
foreach ($arrWordParts as $strWordPart) {
if ($strWordPart <> '') {
$wordPartCount++;
}
}
$intSyllableCount = $wordPartCount + $tmpPrefixSuffixCount;
// Check syllable patterns
foreach ($pattern->{'subSyllablePatterns'} as $strSyllable) {
$intSyllableCount -= preg_match('`' . $strSyllable . '`', $strWord);
}
foreach ($pattern->{'addSyllablePatterns'} as $strSyllable) {
$intSyllableCount += preg_match('`' . $strSyllable . '`', $strWord);
}
$intSyllableCount = ($intSyllableCount == 0) ? 1 : $intSyllableCount;
return $intSyllableCount;
}
function calculateReadabilityScore($stringText) {
# Calculate score
$totalSentences = 1;
$punctuationMarks = array('.', '!', ':', ';');
foreach ($punctuationMarks as $punctuationMark) {
$totalSentences += substr_count($stringText, $punctuationMark);
}
// get ASL value
$totalWords = str_word_count($stringText);
$ASL = $totalWords / $totalSentences;
// find syllables value
$syllableCount = 0;
$arrWords = explode(' ', $stringText);
$intWordCount = count($arrWords);
//$intWordCount = $totalWords;
for ($i = 0; $i < $intWordCount; $i++) {
$syllableCount += $this->countSyllable($arrWords[$i]);
}
// get ASW value
$ASW = $syllableCount / $totalWords;
// Count the readability score
$score = 206.835 - (1.015 * $ASL) - (84.6 * $ASW);
return $score;
} } ?>
// 示例:如何使用
<?php // Create object to count readability score
$readObj = new ReadabilityAlgorithm();
echo $readObj->calculateReadabilityScore("Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into: electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently; with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum!");
?>