0

你能帮我找出导致这个进程达到 500MB 内存使用量的原因吗?它基本上是一个 html 页面下载器。尽管该过程是稳定的(并且不超过该限制),但它意味着在性能低下的机器上使用,我并不满意。mysql 表“站点”的大小为 170MB。遵循脚本代码。提前致谢。

function start() {
try {
    global $log;
    $db = getConnection();
    Zend_Db_Table::setDefaultAdapter($db);
    $log->logInfo("logger start");
    while (1) {
        $sitesTable = new Zend_Db_Table('Sites');
        $rowset = $sitesTable->fetchAll();
        foreach ($rowset as $row) {
            if (time() >= (strtotime($row->lastUpdate) + $row->pollingHours * 60 * 60)) {
                db_updateHtml($row);
            }
        }
    }
} catch (Exception $e) {
    global $log;
    $log->logError($e->getMessage());
}
}

    function db_updateHtml($siteRecord) {
        try {
            if ($siteRecord instanceof Zend_Db_Table_Row) {
                $rowwithConnection = $siteRecord;
                $url = $siteRecord->url;

                $idSite = $siteRecord->idSite;
                $crawler = new Crawler();
                $sitesTable = new Zend_Db_Table('Sites');
                //$rowwithConnection = $sitesTable->fetchRow(
                  //      $sitesTable->select()->where('idSite = ?', $idSite));
                $newHtml = HtmlDbEncode($crawler->get_web_page($url));


                if (strlen($newHtml) < 10) {
                    global $log;
                    $log->logError("Download failed for: url: $url \t idsite: $idSite ");
                }
                if ($rowwithConnection->isChecked != 0) {
                    $rowwithConnection->oldHtml = $rowwithConnection->newHtml;
                    $rowwithConnection->isChecked = 0;
                }
                $rowwithConnection->newHtml = $crawler->get_web_page($url);
                $rowwithConnection->lastUpdate = date("Y-m-d H:i:s");
                //$rowwithConnection->diffHtml = getDiff($rowwithConnection->oldHtml, $rowwithConnection->newHtml, false, $rowwithConnection->minLengthChange);
                $rowwithConnection->diffHtml = getDiffFromRecord($rowwithConnection, false, $rowwithConnection->minLengthChange);
                /* if (strlen($rowwithConnection->diffHtml) > 30) {
                  $rowwithConnection->lastChanged = $rowwithConnection->lastUpdate;
                  } */
                $rowwithConnection->save();
            } else {
                $log->logCrit("siteRecord is uninitialized");
            }
        } catch (Exception $e) {
            global $log;
            $log->logError($e->getMessage());
        }
    }

    function getDiffFromRecord($row, $force = false, $minLengthChange = 100) {
        if ($row instanceof Zend_Db_Table_Row) {
            require_once '/var/www/diff/library/finediff.php';
            include_once '/var/www/diff/library/Text/Diff.php';
            $diff = new AndreaDiff();
            $differences = $diff->getDiff($row->oldHtml, $row->newHtml);
            if ($diff->isChanged($minLengthChange) || $force) {
                $row->lastChanged = $row->lastUpdate;
                $row->isChecked = false;
                return ($differences);
            }
        }
        return null;
    }

    function getConnection() {
        try {
            $pdoParams = array(
                PDO::MYSQL_ATTR_USE_BUFFERED_QUERY => true
            );
            $db = new Zend_Db_Adapter_Pdo_Mysql(array(
                        'host' => '127.0.0.1',
                        'username' => 'root',
                        'password' => 'administrator',
                        'dbname' => 'diff',
                        'driver_options' => $pdoParams
                    ));
            return $db;
        } catch (Exception $e) {
            global $log;
            $log->logError($e->getMessage());
        }

    }
4

1 回答 1

0

1)尝试使用 fetch 方法,而不是 fetchAll:

foreach($sitesTable->fetch() as $row){
   //...
}

2)尝试取消设置存储html代码的所有变量(如果你将它保存在内存中),最后一次迭代我想变量$rowwithConnection里面会有html代码。

当我想要配置文件 php 应用程序时,我使用xhprof它将为您节省很多时间。祝你好运!

于 2012-10-02T07:36:19.280 回答