我试图创建一个类,它接受一个短语列表并将其与用户输入进行比较。我试图做的是让像 Porshre Ceyman 这样的东西更正到保时捷 Cayman。
此类需要一组正确的术语 $this->full_model_list 和一组用户输入 $search_terms。我取出了结构,因此您需要传入 full_model_list。请注意,这并没有完全奏效,所以我决定废弃它,它改编自希望纠正大句子的人......
你会这样称呼它:
$sth = new SearchTermHelper;
$resArr = $sth->spellCheckModelKeywords($search_terms)
代码(非常测试版):
<?php
/*
// ---------------------------------------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------------------------------------
//
// FUNCTION: Search Term Helper Class
// PURPOSE: Handles finding matches and such with search terms for keyword searching.
// DETAILS: Functions below build search combinations, find matches, look for spelling issues in words etc.
//
// ---------------------------------------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------------------------------------
*/
class SearchTermHelper
{
public $full_model_list;
private $inv;
// --------------------------------------------------------------------------------------------------------------
// -- return an array of metaphones for each word in a string
// --------------------------------------------------------------------------------------------------------------
private function getMetaPhone($phrase)
{
$metaphones = array();
$words = str_word_count($phrase, 1);
foreach ($words as $word) {
$metaphones[] = metaphone($word);
}
return $metaphones;
}
// --------------------------------------------------------------------------------------------------------------
// -- return the closest matching string found in $this->searchAgainst when compared to $this->input
// --------------------------------------------------------------------------------------------------------------
public function findBestMatchReturnString($searchAgainst, $input, $max_tolerance = 200, $max_length_diff = 200, $min_str = 3, $lower_case = true, $search_in_phrases = true)
{
if (empty($searchAgainst) || empty($input)) return "";
//weed out strings we thing are too small for this
if (strlen($input) <= $min_str) return $input;
$foundbestmatch = -1;
if ($lower_case) $input = strtolower($input);
//sort list or else not best matches may be found first
$counts = array();
foreach ($searchAgainst as $s) {
$counts[] = strlen($s);
}
array_multisort($counts, $searchAgainst);
//get the metaphone equivalent for the input phrase
$tempInput = implode(" ", $this->getMetaPhone($input));
$list = array();
foreach ($searchAgainst as $phrase) {
if ($lower_case) $phrase = strtolower($phrase);
if ($search_in_phrases) $phraseArr = explode(" ",$phrase);
foreach ($phraseArr as $word) {
//get the metaphone equivalent for each phrase we're searching against
$tempSearchAgainst = implode(' ', $this->getMetaPhone($word));
$similarity = levenshtein($tempInput, $tempSearchAgainst);
if ($similarity == 0) // we found an exact match
{
$closest = $word;
$foundbestmatch = 0;
echo "" . $closest . "(" . $foundbestmatch . ") <br>";
break;
}
if ($similarity <= $foundbestmatch || $foundbestmatch < 0) {
$closest = $word;
$foundbestmatch = $similarity;
//keep score
if (array_key_exists($closest, $list)) {
//echo "" . $closest . "(" . $foundbestmatch . ") <br>";
$list[$closest] += 1;
} else {
$list[$closest] = 1;
}
}
}
if ($similarity == 0 || $similarity <= $max_tolerance) break;
}
// if we find a bunch of a value, assume it to be what we wanted
if (!empty($list)) {
if ($most_occuring = array_keys($list, max($list)) && max($list) > 10) {
return $closest;
}
}
//echo "input:".$input."(".$foundbestmatch.") match: ".$closest."\n";
// disallow results to be all that much different in char length (if you want)
if (abs(strlen($closest) - strlen($input)) > $max_length_diff) return "";
// based on tolerance of difference, return if match meets this requirement (0 = exact only 1 = close, 20+ = far)
return ((int)$foundbestmatch <= (int)$max_tolerance) ? $closest : "";
}
// --------------------------------------------------------------------------------------------------------------
// -- Handles passing arrays instead of a string above ( could have done this in the func above )
// --------------------------------------------------------------------------------------------------------------
public function findBestMatchReturnArray($searchAgainst, $inputArray, $max_tolerance = 200, $max_length_diff = 200, $min_str = 3)
{
$results = array();
$tempStr = '';
foreach ($inputArray as $item) {
if ($tmpStr = $this->findBestMatchReturnString($searchAgainst, $item, $max_tolerance, $max_length_diff, $min_str))
$results[] = $tmpStr;
}
return (!empty($results)) ? $results : $results = array();
}
// --------------------------------------------------------------------------------------------------------------
// -- Build combos of search terms -- So we can check Cayman S or S Cayman etc.
// careful, this is very labor intensive ( O(n^k) )
// --------------------------------------------------------------------------------------------------------------
public function buildSearchCombinations(&$set, &$results)
{
for ($i = 0; $i < count($set); $i++) {
$results[] = $set[$i];
$tempset = $set;
array_splice($tempset, $i, 1);
$tempresults = array();
$this->buildSearchCombinations($tempset, $tempresults);
foreach ($tempresults as $res) {
$results[] = trim($set[$i]) . " " . trim($res);
}
}
}
// --------------------------------------------------------------------------------------------------------------
// -- Model match function -- Get best model match from user input.
// --------------------------------------------------------------------------------------------------------------
public function findBestSearchMatches($model_type, $search_terms, $models_list)
{
$partial_search_phrases = array();
if (count($search_terms) > 1) {
$this->buildSearchCombinations($search_terms, $partial_search_phrases); // careful, this is very labor intensive ( O(n^k) )
$partial_search_phrases = array_diff($partial_search_phrases, $search_terms);
for ($i = 0; $i < count($search_terms); $i++) $partial_search_phrases[] = $search_terms[$i];
$partial_search_phrases = array_values($partial_search_phrases);
} else {
$partial_search_phrases = $search_terms;
}
//sort list or else not best matches may be found first
$counts = array();
foreach ($models_list as $m) {
$counts[] = strlen($m);
}
array_multisort($counts,SORT_DESC,$models_list);
unset($counts);
//sort list or else not best matches may be found first
foreach ($partial_search_phrases as $p) {
$counts[] = strlen($p);
}
array_multisort($counts,SORT_DESC,$partial_search_phrases);
$results = array("exact_match" => '', "partial_match" => '');
foreach ($partial_search_phrases as $term) {
foreach ($models_list as $model) {
foreach ($model_type as $mt) {
if (strpos(strtolower($model), strtolower($mt)) !== false) {
if ((strtolower($model) == strtolower($term) || strtolower($model) == strtolower($mt . " " . $term))
) {
// echo " " . $model . " === " . $term . " <br>";
if (strlen($model) > strlen($results['exact_match']) /*|| strtolower($term) != strtolower($mt)*/
) {
$results['exact_match'] = strtolower($model);
return $results;
}
} else if (strpos(strtolower($model), strtolower($term)) !== false) {
if (strlen($term) > strlen($results['partial_match'])
|| strtolower($term) != strtolower($mt)
) {
$results['partial_match'] = $term;
//return $results;
}
}
}
}
}
}
return $results;
}
// --------------------------------------------------------------------------------------------------------------
// -- Get all models in DB for Make (e.g. porsche) (could include multiple makes)
// --------------------------------------------------------------------------------------------------------------
public function initializeFullModelList($make) {
$this->full_model_list = array();
$modelsDB = $this->inv->getAllModelsForMakeAndCounts($make);
foreach ($modelsDB as $m) {
$this->full_model_list[] = $m['model'];
}
}
// --------------------------------------------------------------------------------------------------------------
// -- spell checker -- use algorithm to check model spelling (could expand to include english words)
// --------------------------------------------------------------------------------------------------------------
public function spellCheckModelKeywords($search_terms)
{
// INPUTS: findBestMatchReturnArray($searchList, $inputArray,$tolerance,$differenceLenTolerance,$ignoreStringsOfLengthX,$useLowerCase);
//
// $searchList, - The list of items you want to get a match from
// $inputArray, - The user input value or value array
// $tolerance, - How close do we want the match to be 0 = exact, 1 = close, 2 = less close, etc. 20 = find a match 100% of the time
// $lenTolerance, - the number of characters between input and match allowed, ie. 3 would mean match can be +- 3 in length diff
// $ignoreStrLessEq, - min number of chars that must be before checking (i.e. if 3 ignore anything 3 in length to check)
// $useLowerCase - puts the phrases in lower case for easier matching ( not needed per se )
// $searchInPhrases - compare against every word in searchList (which could be groups of words per array item (so search every word past to function
$tolerance = 0; // 1-2 recommended
$lenTolerance = 1; // 1-3 recommended
$ignoreStrLessEq = 3; // may not want to correct tiny words, 3-4 recommended
$useLowercase = true; // convert to lowercase matching = true
$searchInPhrases = true; //match words not phrases, true recommended
$spell_checked_search_terms = $this->findBestMatchReturnArray($this->full_model_list, $search_terms, $tolerance, $lenTolerance, $ignoreStrLessEq, $useLowercase,$searchInPhrases);
$spell_checked_search_terms = array_values($spell_checked_search_terms);
// return spell checked terms
if (!empty($spell_checked_search_terms)) {
if (strpos(strtolower(implode(" ", $spell_checked_search_terms)), strtolower(implode(" ", $search_terms))) === false //&&
// strlen(implode(" ", $spell_checked_search_terms)) > 4
) {
return $spell_checked_search_terms;
}
}
// or just return search terms as is
return $search_terms;
}
}
?>