
由于函数很长,您实际上可以跳过该函数,因为它们仅从 HTML 标签中提取特定数据请尝试通过为这种规模的问题提供逻辑来提供帮助完整的数据库。实际的函数很长,插入到 4 个不同的表中,估计有 60k 行。我的问题是即使在函数完成执行后,我也可以看到新行被添加到我的数据库中。我猜 MySQL 的插入速度低于我的迭代速度。


public function start()
    // set the loop running for ever
    ini_set('MAX_EXECUTION_TIME', -1);  
    $lastId = $this->Kpkt_model->last_temp_id();
    if($lastId == 0 OR $lastId == '' OR $lastId == '0')
        $lastId = 0;
        $lastId = $this->Kpkt_model->last_temp_id();
    // add 1 to start the process
    $i = $lastId+1;
    // set the errors to 0
    $errors = 0;

    while($errors < 25)
        if($this->_scrap_all($i) == 'empty')
            // its an empty record, add 1 to the errors

            $errors = 0;


function _scrap_all($i)

    $url = "mydesiredwebsite.com?PMJU_KOD=$i";

    $html = file_get_html($url);
    // then check if the html element exists to avoid trying to parse non-html

    $tag = $html->find('td[class=tdSecondtext1]');

    // now we need to remove all the redundant spaces
    $tag = preg_replace("/[[:blank:]]+/"," ",$tag);
    // lets sanitize the dirty string from the html special characters
    $tag = preg_replace("/&#?[a-z0-9]{2,8};/i","",$tag);

    $name =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[0])));

    $file = strip_tags(str_replace("\n","", str_replace("\r","", $tag[1])));
    $roc = strip_tags(str_replace("\n","", str_replace("\r","", $tag[2])));
    $address = strip_tags(str_replace("\n","", str_replace("\r","", $tag[3])));
    $city = strip_tags(str_replace("\n","", str_replace("\r","", $tag[4])));
    $postcode = strip_tags(str_replace("\n","", str_replace("\r","", $tag[5])));
    $district = strip_tags(str_replace("\n","", str_replace("\r","", $tag[6])));
    $state = strip_tags(str_replace("\n","", str_replace("\r","", $tag[7])));
    $telephone = strip_tags(str_replace("\n","", str_replace("\r","", $tag[8])));
    $fax = strip_tags(str_replace("\n","", str_replace("\r","", $tag[9])));
    $website = strip_tags(str_replace("\n","", str_replace("\r","", $tag[10])));
    $last_update = strip_tags(str_replace("\n","", str_replace("\r","", $tag[11])));

    // check to see if this array contains data or has returned from the error page
    if(strlen($name) < 4)
        // here we can assume the project is empty by looking at it's file (unique id)

        return 'empty';
         The project file is not empty, proceed to add to database
         we need to convert this number into a date format
        $date = str_replace('/', '-', $last_update);

        $last_update = date('Y-m-d', strtotime($date));

        // begin insertion
        $this->Kpkt_model->insert_company_temp($name, $file, $roc, $address, $city, $postcode, $district, $state, $telephone, $fax, $website, $last_update);                            
        // echo "<span style='color:green'>company #<strong>$i</strong> added to database!</span><br/>"; 
        // lets search the details

        $links = $html->find('a[href^=DetailProjek.cfm]');
        if (sizeof($links > 0))

            foreach($links as $key=>$link)
                // eurika!
                $anchor = $link->getAttribute ( 'href' );
                // change the spaces to html notation
                $anchor = str_replace(' ', '%20', $anchor);

                $url = "mydesiredwebsite.com/$anchor";
                $html2 = file_get_html($url);

                $tag = $html2->find('td[class=tdSecondtext1]');

                // now we need to remove all the redundant spaces
                $tag = preg_replace("/[[:blank:]]+/"," ",$tag);
                // lets sanitize the dirty string from the html special characters
                $tag = preg_replace("/&#?[a-z0-9]{2,8};/i","",$tag);

                // this is our foreign key
                $developer = strip_tags(str_replace("\n","", str_replace("\r","", $tag[1])));

                // first batch
                $name =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[12])));
                $file =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[13])));
                $lot_no =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[14])));
                $state =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[15])));
                $housing_no =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[16])));
                $bank_name =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[17])));
                $license_no =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[18])));
                $license_expire =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[19])));
                $permit_no =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[20])));
                $permit_expire =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[21])));
                $land_status =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[22])));

                $date1 = str_replace('/', '-', $license_expire);
                $license_expire = date('Y-m-d', strtotime($date1));

                $date2 = str_replace('/', '-', $permit_expire);
                $permit_expire = date('Y-m-d', strtotime($date2));      



                $project_id = $this->Kpkt_model->get_last_id();                     

                    delete the first 23 items of the array
                    since we dont know how many rows of data are under here, we deduct the company details and
                    primary project details
                    sizeof(array) - 23 / 12 = the number of rows we need to insert
                    MD 30/01/2013

                $newTags = array_slice($tag, 23);
                // separate them into batches of 12
                $newTags = array_chunk($newTags, 12);
                // now we iterate through the array and add the details in
                foreach($newTags AS $tag)

                    $category =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[0])));
                    $type =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[1])));
                    $storey =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[2])));
                    $floorArea =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[3])));
                    $totalArea =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[4])));
                    $units =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[5])));
                    $tcf =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[6])));
                    $cf =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[7])));
                    $priceMax =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[8])));
                    $priceStandard =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[9])));
                    $priceMin =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[10])));
                    $progressReport =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[11])));

                    $date1 = str_replace('/', '-', $tcf);
                    $tcf = date('Y-m-d', strtotime($date1));

                    $date2 = str_replace('/', '-', $cf);
                    $cf = date('Y-m-d', strtotime($date2)); 


                // clean the memory
                $sellingInfoLinks = $html2->find('a[href^=LaporanJualRumah.cfm]');

                if(sizeof($sellingInfoLinks > 0 ))
                    foreach($sellingInfoLinks AS $key=>$selling)
                        // now we sift through the selling information
                        $anchor = $selling->getAttribute ( 'href' );
                        // change the spaces to html notation
                        $anchor = str_replace(' ', '%20', $anchor);

                        $url = "mydesiredwebsite.com/$anchor";
                        $html3 = file_get_html($url);

                        $tag = $html3->find('tr[bgcolor!=#fc6535] td div font');

                        // now we need to remove all the redundant spaces
                        $tag = preg_replace("/[[:blank:]]+/"," ",$tag);
                        // lets sanitize the dirty string from the html special characters
                        $tag = preg_replace("/&#?[a-z0-9]{2,8};/i","",$tag);

                        // there are 12 items per array
                        $numRows = sizeof($tag) / 12;

                        $tag = array_chunk($tag, 12);
                        foreach ($tag as $value)
                            // echo '<pre>';
                            // print_r($value);
                            // echo '</pre>';
                            // we break down the selling information into chunks of 12 to insert into the database, each bunch of 12 is one set of data
                            $company_id = $developer;
                            $project_id = $project_id;

                            $roomType = strip_tags(str_replace("\n","", str_replace("\r","", $value[0])));
                            $levels = strip_tags(str_replace("\n","", str_replace("\r","", $value[1])));
                            $local = strip_tags(str_replace("\n","", str_replace("\r","", $value[2])));
                            $chinse = strip_tags(str_replace("\n","", str_replace("\r","", $value[3])));
                            $indian = strip_tags(str_replace("\n","", str_replace("\r","", $value[4])));
                            $other = strip_tags(str_replace("\n","", str_replace("\r","", $value[5])));
                            $foreign = strip_tags(str_replace("\n","", str_replace("\r","", $value[6])));
                            $totalSold = strip_tags(str_replace("\n","", str_replace("\r","", $value[7])));
                            $totalUnsold = strip_tags(str_replace("\n","", str_replace("\r","", $value[8])));
                            $totalPerUnit = strip_tags(str_replace("\n","", str_replace("\r","", $value[9])));
                            $approvedUnits = strip_tags(str_replace("\n","", str_replace("\r","", $value[10])));
                            $developedUnits = strip_tags(str_replace("\n","", str_replace("\r","", $value[11])));
                            //echo sizeof($totalPerUnit);
                            # code...



// clean the memory


function insert_company_temp($name, $file, $roc, $address, $city, $postcode, $district, $state, $telephone, $fax, $website, $last_update)
    $data = array(
        'file'          =>  $file,
        'name'          =>  $name,
        'roc'           =>  $roc,
        'address'       =>  $address,
        'city'          =>  $city,
        'postcode'      =>  $postcode,
        'district'      =>  $district,
        'state'         =>  $state,
        'telephone'     =>  $telephone,
        'fax'           =>  $fax,
        'website'       =>  $website,
        'last_update'   =>  $last_update


    $this->db->insert('kpkt_company_temp', $data);      
    //echo $this->db->last_query();

function last_id()
    $query = "SELECT MAX(id) AS id FROM kpkt_company";
    $res = $this->db->query($query);
    return $res->row('id');
    //echo $this->db->last_query();


function last_temp_id()
    $query = "SELECT MAX(id) AS id FROM kpkt_company_temp";
    $res = $this->db->query($query);
    return $res->row('id');
    //echo $this->db->last_query();


function add_project_information_temp(
    $data = array(
        'developer_id'      =>  $developer,
        'name'              =>  $name,
        'file'              =>  $file,
        'lot_no'            =>  $lot_no,
        'state'             =>  $state,
        'housing_no'        =>  $housing_no,
        'bank_name'         =>  $bank_name,
        'license'           =>  $license_no,
        'license_expire'    =>  $license_expire,
        'permit_no'         =>  $permit_no,
        'permit_expire'     =>  $permit_expire,
        'land_status'       =>  $land_status


    $this->db->insert('kpkt_project_information_temp', $data);          

function add_project_development_information_temp(
    $data = array(
        'developer_id'      =>  $developer,
        'project_id'        =>  $project_id,
        'house_category'    =>  $category,
        'house_type'        =>  $type,
        'levels'            =>  $storey,
        'floor_area'        =>  $floorArea,
        'total_area'        =>  $totalArea,
        'units'             =>  $units,
        'tcf'               =>  $tcf,
        'cf'                =>  $cf,
        'price_max'         =>  $priceMax,
        'price_standard'    =>  $priceStandard,
        'price_min'         =>  $priceMin,
        'progress_report'   =>  $progressReport         

    $this->db->insert('kpkt_project_development_information_temp', $data);      

function add_selling_information_temp(
    $data = array(
        'developer_id'  =>  $company_id,
        'project_id'    =>  $project_id,
        'house_type'    =>  $roomType,
        'levels'        =>  $levels,
        'bumi'          =>  $local,
        'chinese'       =>  $chinese,
        'indian'        =>  $indian,
        'other'         =>  $other,
        'foreigner'     =>  $foreign,
        'units_sold'    =>  $totalSold,
        'units_unsold'  =>  $totalUnsold,
        'price_per_unit'=>  $totalPerUnit,
        'approved_units'=>  $approvedUnits,
        'developed_units'=> $developedUnits         

    $this->db->insert('kpkt_selling_information_temp', $data);

所以再次澄清一下,一切都运行得非常顺利,没有任何问题,除了INSERT陈述不断落后可以这么说。我怎样才能while节流function start()?谢谢


1 回答 1


您正在使用 usleep,这是一个微秒的暂停;http://php.net/manual/en/function.usleep.php


也许使用;sleep(1) 或将 usleep 移动到 100000 = 0.1 秒

于 2013-03-28T02:06:30.687 回答