0

因此,我正在尝试根据活动标题对运动进行自动分类。

它工作正常,但我认为应该有更好、更可靠的方法来做到这一点。对于一些像 ( FIFA) 这样的运动,它输出它的类型NCAA而不是FIFA相同的东西,MMA还有 1-2 个。

这是我的代码(注意: str_contains是我正在使用的 laravel 辅助函数)。

$strTitle = strtolower($title);
      if(str_contains($strTitle, 'mlb') || str_contains($strTitle, 'baseball')) {
        $category = 'Baseball';
        $type = 'MLB';
      } elseif (str_contains($strTitle, 'nba') || str_contains($strTitle, 'fiba') || str_contains($strTitle, 'basketball') || str_contains($strTitle, 'wnba')) {
        $category = 'Basketball';
        $type = (str_contains($strTitle, 'nba')) ? 'NBA':
                  (str_contains($strTitle, 'fiba')) ? 'FIBA':
                    (str_contains($strTitle, 'wnba')) ? 'WNBA':'Basketball';
      } elseif (str_contains($strTitle, 'nhl') || str_contains($strTitle, 'hockey')) {
        $category = 'Hockey';
        $type = 'NHL';
      } elseif (str_contains($strTitle, 'nascar') || str_contains($strTitle, 'formula one') || str_contains($strTitle, 'gp2') || str_contains($strTitle, 'gp3') || str_contains($strTitle, 'motogp') || str_contains($strTitle, 'moto2') || str_contains($strTitle, 'moto3') || str_contains($strTitle, 'f1')) {
        $category = 'Motor Sport';
        $type = (str_contains($strTitle, 'nascar')) ? 'NASCAR':
                  (str_contains($strTitle, 'gp2')) ? 'GP2':
                    (str_contains($strTitle, 'gp3')) ? 'GP3':
                      (str_contains($strTitle, 'motogp')) ? 'MotoGP':
                        (str_contains($strTitle, 'moto2')) ? 'Moto2':
                          (str_contains($strTitle, 'moto3')) ? 'Moto3':
                            (str_contains($strTitle, 'f1') || str_contains($strTitle, 'formula one')) ? 'F1':'Motor Sport';
      } elseif (str_contains($strTitle, 'nfl') || str_contains($strTitle, 'afl') || str_contains($strTitle, 'welsh premier league') || str_contains($strTitle, 'fox college') || str_contains($strTitle, 'football') || str_contains($strTitle, 'serie') || str_contains($strTitle, 'soccer') || str_contains($strTitle, 'fifa') || str_contains($strTitle, 'ncaa')) {
        $category = 'Football';
        $type = (str_contains($strTitle, 'nfl')) ? 'NFL':
                  (str_contains($strTitle, 'fifa')) ? 'FIFA':
                    (str_contains($strTitle, 'afl')) ? 'AFL':
                      (str_contains($strTitle, 'welsh premier league')) ? 'Welsh Premier League':
                        (str_contains($strTitle, 'ncaa')) ? 'NCAA':'Football';
      } elseif (str_contains($strTitle, 'tennis')) {
        $category = 'Tennis';
        $type = 'Tennis';
      }  elseif (str_contains($strTitle, 'golf')) {
        $category = 'Golf';
        $type = 'Golf';
      } elseif (str_contains($strTitle, 'rugby') || str_contains($strTitle, 'nrl')) {
        $category = 'Rugby';
        $type = (str_contains($strTitle, 'nrl')) ? 'NRL' : 'Rugby';
      } elseif (str_contains($strTitle, 'sailing') || str_contains($strTitle, 'america\'s cup')) {
        $category = 'Water Sport';
        $type = 'Sailing';
      } elseif (str_contains($strTitle, 'boxing') || str_contains($strTitle, 'fight night') || str_contains($strTitle, 'fighting') || str_contains($strTitle, 'wwe') || str_contains($strTitle, 'smackdown') || str_contains($strTitle, 'raw') || str_contains($strTitle, 'wwe main event') || str_contains($strTitle, 'mma') || str_contains($strTitle, 'strikeforce') || str_contains($strTitle, 'tna')) {
        $category = 'Boxing';
        $type = (str_contains($strTitle, 'ufc')) ? 'UFC' :
                  (str_contains($strTitle, 'smackdown')) ? 'WWE Smackdown' :
                    (str_contains($strTitle, 'raw')) ? 'WWE RAW' :
                      (str_contains($strTitle, 'wwe main event')) ? 'WWE Main Event':
                        (str_contains($strTitle, 'wwe')) ? 'WWE':
                          (str_contains($strTitle, 'mma')) ? 'MMA':
                            (str_contains($strTitle, 'tna')) ? 'TNA':
                              (str_contains($strTitle, 'strikeforce')) ? 'Strikeforce':
                                (str_contains($strTitle, 'fight night')) ? 'Fight Night':
                                  (str_contains($strTitle, 'fighting')) ? 'Fighting':'Boxing';
      } elseif (str_contains($strTitle, 'cricket') || str_contains($strTitle, 'icc') || str_contains($strTitle, 'mcc') || str_contains($strTitle, 'odi') || str_contains($strTitle, 'ipl') || str_contains($strTitle, 't20') || str_contains($strTitle, 'twenty20')) {
        $category = 'Cricket';
        $type = (str_contains($strTitle, 'icc')) ? 'ICC' :
                  (str_contains($strTitle, 'mcc')) ? 'MCC' :
                    (str_contains($strTitle, 'odi')) ? 'ODI':
                      (str_contains($strTitle, 'ipl')) ? 'IPL':
                        (str_contains($strTitle, 't20')) ? 'T20':
                          (str_contains($strTitle, 'twenty20')) ? 'Twenty20':'Cricket';
      }

注意 2:这不是完整的代码,也不是针对所有运动的,仅针对我有 atm 的那些。

4

1 回答 1

2

虽然远非理想的解决方案,但这是我拼凑起来的一些东西,它会产生相同的结果,可能会产生类似的性能影响(不知道,真的,只是一个猜测),那就是MUCH MUCH MUCH MUCH more readable

在此之前,认真:看看你所有的三元条件。这似乎是个好主意?!

注意事项

  1. str_contains()使用 PHP 的strpos(),它区分大小写。您需要牢记这一点,或者在搜索/比较之前将整个字符串小写。

  2. 同样,strpos()不在乎它是否其他单词/字符串中找到字符串。因此,例如,如果标题包含“WNBA”,则关键字“NBA”将首先匹配,然后此检查将结束,给您带来意想不到的结果。您可以通过从最大、最具体的第一到最小、最模糊的最后列出您的关键字来解决此问题。

  3. 除非您使用大量关键字(我的意思是废话),否则这里的性能并不太糟糕。但是,您仍然有可能通过 10-20 组关键字而找不到匹配项,只是因为顺序。除了使用基于文本的搜索软件(例如 Sphinx、Lucene、Solr、基于 DB 等)之外,我没有真正好的、直接的解决方案,但请记住这一点。


// Define your sports and their keywords / human values.
// I use an array of objects (I like objects). This could be
// a JSON or XML feed, generated through an API or your DB.
// Doesn't matter. Just give the data you need to check against
// a structure, not just hardcoded into conditionals.
$sports = [
    (object) [
        'category'  => 'Baseball',
        'keywords'  => [
            'baseball'  => 'Baseball',
            'mlb'       => 'MLB'
        ],
    ],
    (object) [
        'category'  => 'Basketball',
        'keywords'  => [
            'basketball'    => 'Basketball',
            'nba'           => 'NBA',
            'fiba'          => 'FIBA',
            'wnba'          => 'WNBA',
        ],
    ],
    (object) [
        'category'  => 'Motor Sport',
        'keywords'  => [
            'nascar'    => 'NASCAR',
            'gp2'       => 'GP2',
            'gp3'       => 'GP3',
            'motogp'    => 'MotoGP',
            'moto2'     => 'Moto2',
            'moto3'     => 'Moto3',
            'f1'        => 'Formula 1'
        ],
    ],
];

$title = strtolower("Rookie player Cryode injured in bizarre FIBA accident.");

$sport_category = null;
$sport_type = null;

// Step 1: Loop each sport.
foreach ($sports as $sport)
{
    // Step 2: At least one keyword matched. Let's see which one.
    foreach ($sport->keywords as $key_search => $key_type)
    {
        if (str_contains($title, $key_search))
        {
            // Step 3: We've found the matching keyword.
            // Define the info we need from it...
            $sport_category = $sport->category;
            $sport_type = $key_type;

            // ... then break BOTH loops.
            break 2;
        }
    }
}

// Step 4: Check for no matches here by seeing
// if the category or type is still null.
// Or, initially set vars to default values.

var_dump($sport_category, $sport_type);
于 2013-09-07T00:31:08.130 回答