0

我有兴趣知道我是否可以检测到搜索字符串中的变形(例如狗/狗),删除不重要的词(“美国制造”->“in”和“the”不重要)等用户为 Magento 搜索引擎输入的内容,无需在一个大的 PHP 代码块中硬编码如此多的场景。我可以在一定程度上处理这个搜索字符串,但它会看起来不卫生和丑陋。

使其成为“智能”搜索引擎的任何建议或指示?

4

1 回答 1

0

使用这个类:

class Inflection
{
    static $plural = array(
    '/(quiz)$/i' => "$1zes",
    '/^(ox)$/i' => "$1en",
    '/([m|l])ouse$/i' => "$1ice",
    '/(matr|vert|ind)ix|ex$/i' => "$1ices",
    '/(x|ch|ss|sh)$/i' => "$1es",
    '/([^aeiouy]|qu)y$/i' => "$1ies",
    '/(hive)$/i' => "$1s",
    '/(?:([^f])fe|([lr])f)$/i' => "$1$2ves",
    '/(shea|lea|loa|thie)f$/i' => "$1ves",
    '/sis$/i' => "ses",
    '/([ti])um$/i' => "$1a",
    '/(tomat|potat|ech|her|vet)o$/i'=> "$1oes",
    '/(bu)s$/i' => "$1ses",
    '/(alias)$/i' => "$1es",
    '/(octop)us$/i' => "$1i",
    '/(ax|test)is$/i' => "$1es",
    '/(us)$/i' => "$1es",
    '/s$/i' => "s",
    '/$/' => "s"
    );

    static $singular = array(
    '/(quiz)zes$/i' => "$1",
    '/(matr)ices$/i' => "$1ix",
    '/(vert|ind)ices$/i' => "$1ex",
    '/^(ox)en$/i' => "$1",
    '/(alias)es$/i' => "$1",
    '/(octop|vir)i$/i' => "$1us",
    '/(cris|ax|test)es$/i' => "$1is",
    '/(shoe)s$/i' => "$1",
    '/(o)es$/i' => "$1",
    '/(bus)es$/i' => "$1",
    '/([m|l])ice$/i' => "$1ouse",
    '/(x|ch|ss|sh)es$/i' => "$1",
    '/(m)ovies$/i' => "$1ovie",
    '/(s)eries$/i' => "$1eries",
    '/([^aeiouy]|qu)ies$/i' => "$1y",
    '/([lr])ves$/i' => "$1f",
    '/(tive)s$/i' => "$1",
    '/(hive)s$/i' => "$1",
    '/(li|wi|kni)ves$/i' => "$1fe",
    '/(shea|loa|lea|thie)ves$/i'=> "$1f",
    '/(^analy)ses$/i' => "$1sis",
    '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => "$1$2sis",
    '/([ti])a$/i' => "$1um",
    '/(n)ews$/i' => "$1ews",
    '/(h|bl)ouses$/i' => "$1ouse",
    '/(corpse)s$/i' => "$1",
    '/(us)es$/i' => "$1",
    '/s$/i' => ""
    );

    static $irregular = array(
    'move' => 'moves',
    'foot' => 'feet',
    'goose' => 'geese',
    'sex' => 'sexes',
    'child' => 'children',
    'man' => 'men',
    'tooth' => 'teeth',
    'person' => 'people',
    'admin' => 'admin'
    );

    static $uncountable = array(
    'sheep',
    'fish',
    'deer',
    'series',
    'species',
    'money',
    'rice',
    'information',
    'equipment'
    );

    public static function pluralize( $string )
    {
global $irregularWords;

// save some time in the case that singular and plural are the same
    if ( in_array( strtolower( $string ), self::$uncountable ) )
        return $string;

    // check for irregular singular forms
    foreach ( $irregularWords as $pattern => $result )
    {
        $pattern = '/' . $pattern . '$/i';

        if ( preg_match( $pattern, $string ) )
            return preg_replace( $pattern, $result, $string);
    }

    // check for irregular singular forms
    foreach ( self::$irregular as $pattern => $result )
    {
        $pattern = '/' . $pattern . '$/i';

        if ( preg_match( $pattern, $string ) )
            return preg_replace( $pattern, $result, $string);
    }

    // check for matches using regular expressions
    foreach ( self::$plural as $pattern => $result )
    {
        if ( preg_match( $pattern, $string ) )
            return preg_replace( $pattern, $result, $string );
    }

    return $string;
    }

    public static function singularize( $string )
    {   
global $irregularWords;
    // save some time in the case that singular and plural are the same
    if ( in_array( strtolower( $string ), self::$uncountable ) )
        return $string;

// check for irregular words
    foreach ( $irregularWords as $result => $pattern )
    {
        $pattern = '/' . $pattern . '$/i';

        if ( preg_match( $pattern, $string ) )
            return preg_replace( $pattern, $result, $string);
    }

// check for irregular plural forms
    foreach ( self::$irregular as $result => $pattern )
    {
        $pattern = '/' . $pattern . '$/i';

        if ( preg_match( $pattern, $string ) )
            return preg_replace( $pattern, $result, $string);
    }

// check for matches using regular expressions
    foreach ( self::$singular as $pattern => $result )
    {
        if ( preg_match( $pattern, $string ) )
            return preg_replace( $pattern, $result, $string );
    }

    return $string;
    }

    public static function pluralize_if($count, $string)
    {
    if ($count == 1)
        return "1 $string";
    else
        return $count . " " . self::pluralize($string);
    }
}

如果您有时间使用变形用法的标准方法:http ://en.wikipedia.org/wiki/Inflection

您可以将数组与 XML 相结合,以便将所有变形数据放入,看看 codeigniter 如何非常友好地进行变形:http: //ellislab.com/codeigniter/user-guide/helpers/inflector_helper.html

许多框架支持内置的屈折变化,但它主要只关注英语。对于其他语言,您应该编写自己的...或使用 unicode.org 以及其他语言的一些变形标准(如果需要)。

于 2013-03-08T01:02:52.463 回答