我现在正在做的是读取文本文件的内容并将其存储在一个变量中。读取全部内容后,我为块数据运行一个循环,并在其中调用一个函数,该函数将读取块数据的每一行并将每一行传递给另一个函数,该函数处理每列数据的处理并将其插入批量数据库。批次是整个块。
对于每个大小超过 500KB 的文件,代码过程太长。我的问题是文本文件中没有我可以使用的唯一标识符,因此我可以应用“加载数据输入文件”,这使我处于按块处理文本文件的情况。
700K 几乎花了一整天的时间来处理,但这仍然取决于机器规格。代码在 CentOS 中运行。在处理完第一个文本文件后,下一个 800KB++ 大小的文本文件花了将近一周的时间来处理。在这些与其他大小超过 800KB 的文本文件一起使用时,需要花费几乎或一周以上的时间来处理,尤其是 1MB 大小的文件。
有人可以告诉我我做错了什么以及我需要哪些选项来使我的代码有效运行。
/*
====================================================================================
RECORDS FETCH
====================================================================================
Needs path and filename with extension.
The page do an iteration of records in the file by line.
After by line, it iterates again per delimiter ","..
It concatenates the part of the records for bulk insert process.
PID address is incremental, every three PID correspond to one Chamber
and the reading in each Chamber is CO2 for first PID address, RH for the
second PID address, TEMP for the third PID address.
====================================================================================
*/
$path = "";
$filename = "";
error_reporting(0);
include_once ("connect.php");
$p_results = mysql_query("SELECT PATH, FILENAME FROM tbl_path");
if(mysql_num_rows($p_results) > 0 ){
while ( $rows = mysql_fetch_assoc($p_results) )
{
$path = $rows['PATH'];
$filename = $rows['FILENAME'];
}
}
else
{
mysql_query("INSERT INTO tbl_log (LOG_DATE, DETAILS) VALUES ( NOW(), 'There is no path and filename to be accessed. Please provide.' )");
}
$path = str_replace('\\','/',$path);
//holds the path..NOTE: Change backslash (\) to forward slash (/)
//$path = "E:/";
//holds the filename.. NOTE: Include the file extension
//$filename = "sample2.txt"; //"chamber_1_con.txt";
if ($path <> "" && $filename <> "")
is_connected($path, $filename);
echo ('<script language="javascript">window.location = "chambers_monitoring.php" </script>');
//function for DB writing in table data
function InsertData($rec, &$errorDataCnt, &$sql, $y, $i, $x, &$dCnt)
{
$dDate = (!isset($rec[0]) ? 0 : (trim($rec[0]) == "" ? 0 : trim($rec[0])));
$dTime = (!isset($rec[1]) ? 0 : (trim($rec[1]) == "" ? 0 : trim($rec[1])));
$address = (!isset($rec[2]) ? 0 : (trim($rec[2]) == "" ? 0 : trim($rec[2])));
$co2SV = (!isset($rec[3]) ? 0 : (trim($rec[3]) == "" ? 0 : trim($rec[3])));
$co2PV = (!isset($rec[4]) ? 0 : (trim($rec[4]) == "" ? 0 : trim($rec[4])));
$tempSV = (!isset($rec[5]) ? 0 : (trim($rec[5]) == "" ? 0 : trim($rec[5])));
$tempPV = (!isset($rec[6]) ? 0 : (trim($rec[6]) == "" ? 0 : trim($rec[6])));
$rhSV = (!isset($rec[7]) ? 0 : (trim($rec[7]) == "" ? 0 : trim($rec[7])));
$rhPV = (!isset($rec[8]) ? 0 : (trim($rec[8]) == "" ? 0 : trim($rec[8])));
/* include('connect.php'); */
set_time_limit(36000);
ini_set('max_execution_time','43200');
$e_results = mysql_query("SELECT ID FROM tbl_reading WHERE (READING_DATE = '".date("Y-m-d",strtotime($dDate))."' AND READING_TIME = '".date("H:i:s",strtotime($dTime))."') AND READING_ADDRESS = $address LIMIT 1");
if(mysql_num_rows($e_results) <= 0 ){
if (!($dDate == 0 || $dTime == 0 || $address == 0) ) {
if ($y == 0){
$sql = "INSERT INTO tbl_reading (READING_DATE, READING_TIME, READING_ADDRESS, CO2_SET_VALUE, CO2_PROCESS_VALUE, TEMP_SET_VALUE, TEMP_PROCESS_VALUE, RH_SET_VALUE, RH_PROCESS_VALUE) VALUES ('".date("Y/m/d",strtotime($dDate))."','".date("H:i:s",strtotime($dTime))."', ". mysql_real_escape_string($address).",". mysql_real_escape_string($co2SV).",". mysql_real_escape_string($co2PV).",". mysql_real_escape_string($tempSV).",". mysql_real_escape_string($tempPV).",". mysql_real_escape_string($rhSV).",". mysql_real_escape_string($rhPV).")";
}
else {
$sql .= ", ('".date("Y/m/d",strtotime($dDate))."','".date("H:i:s",strtotime($dTime))."', ". mysql_real_escape_string($address).",". mysql_real_escape_string($co2SV).",". mysql_real_escape_string($co2PV).",". mysql_real_escape_string($tempSV).",". mysql_real_escape_string($tempPV).",". mysql_real_escape_string($rhSV).",". mysql_real_escape_string($rhPV).")";
}
}
}
if(($x + 1) == $i){
//echo ($x + 1)." = ".$i."<br>";
if (substr($sql, 0, 1) == ",")
$sql = "INSERT INTO tbl_reading (READING_DATE, READING_TIME, READING_ADDRESS, CO2_SET_VALUE, CO2_PROCESS_VALUE, TEMP_SET_VALUE, TEMP_PROCESS_VALUE, RH_SET_VALUE, RH_PROCESS_VALUE) VALUES".substr($sql, 1);
//echo $sql."<br>";
set_time_limit(36000);
try {
$result = mysql_query($sql) ;
$dCnt = mysql_affected_rows();
if( $dCnt == 0)
{
$errorDataCnt = $errorDataCnt + 1;
}
}
catch (Exception $e)
{
mysql_query("INSERT INTO tbl_log (LOG_DATE, DETAILS) VALUES ( NOW(), '".$e->getMessage()."' )");
}
//mysql_free_result($result);
}
unset($dDate);
unset($dTime);
unset($address);
unset($co2SV);
unset($co2PV);
unset($tempSV);
unset($tempPV);
unset($rhSV);
unset($rhPV);
}
//function for looping into the records per line
function loop($data)
{
$errorDataCnt = 0; $sql = ""; $exist = 0;
$i = count( $data); $x = 0; $y = 0; $tmpAdd = ""; $cnt = 0; $t = 0; $dCnt = 0;
ini_set('max_execution_time','43200');
while($x < $i)
{
$rec = explode(",", $data[$x]);
InsertData($rec, $errorDataCnt, $sql, $y, $i, $x, $dCnt);
$x++;
$y++;
unset($rec);
}
$errFetch = ($i - $dCnt);
if($errorDataCnt > 0)
mysql_query("INSERT INTO tbl_log (LOG_DATE, DETAILS) VALUES ( NOW(), 'Error inserting $errFetch records. Check if there is a NULL or empty value or if it is the correct data type.' )");
if($dCnt > 0)
mysql_query("INSERT INTO tbl_log (LOG_DATE, DETAILS) VALUES ( NOW(), 'Saved $dCnt of $i records into the database. Total $exist records already existing in the database.' )");
}
// functions in looping records and passing into $contents variable
function DataLoop($file)
{
ini_set("auto_detect_line_endings", true);
set_time_limit(36000);
ini_set('max_execution_time','43200');
$contents = ''; $j = 0;
if ($handle = fopen($file,"rb")){
while (!feof($handle)) {
$rdata = fgets($handle, 3359232);//filesize($file));
//$rdata = fread($handle, filesize($file));
if(trim($rdata) != "" || $rdata === FALSE){
if (feof($handle)) break;
else {
$contents .= $rdata;
$j = $j + 1; }}
}
fclose($handle);
$data = explode("\n", $contents);
unset($contents);
unset($rdata);
}
/* echo count($contents)." ".count($data); */
/* $query = "SELECT MAX(`ID`) AS `max` FROM `tbl_reading`";
$result = mysql_query($query) or die(mysql_error());
$row = mysql_fetch_assoc($result);
$max = $row['max']; */
/* $res = mysql_fetch_assoc(mysql_query("SELECT COUNT(*) as total FROM tbl_reading")) or die(mysql_error());
echo "<script>alert('".$res['total']."')</script>"; */
$p = 0;
ini_set('memory_limit','512M');
if($j != 0)
{
foreach(array_chunk($data, ceil(count($data)/200)) as $rec_data){
loop($rec_data);
$p++;
}
}
}
//function to test if filename exists
function IsExist($file)
{
if ($con = fopen($file, "r"))// file_exists($file))
{
fclose($con);
DataLoop($file);
}
else
mysql_query("INSERT INTO tbl_log (LOG_DATE, DETAILS) VALUES ( NOW(), '$filename is not existing in $path. Check if the filename or the path is correct.' )");
}
//function to test connection to where the file is.
function is_connected($path, $filename)
{
//check to see if the local machine is connected to the network
$errno = ""; $errstr = "";
if (substr(trim($path), -1) == '/')
$file = $path.$filename;
else
$file = $path."/".$filename;
IsExist($file);
}