我是 RETS 导入的新手,所以我开发了这个脚本:
<?php
use Aws\Credentials\Credentials;
use Aws\S3\S3Client;
date_default_timezone_set('America/New_York');
/* --------------------------------------------------------
------- GET THE ARRAY OF SELECTED FIELDS TO INDEX FROM MLS ---
--------------------------------------------------------*/
require_once ('retsfields.php');
$filteredretsfields = array_filter( $retsfields );
/* --------------------------------------------------------
------- DATABASE ------------
--------------------------------------------------------*/
$servername = "xxx";
$username = "xxx";
$password = "xxx";
$dbname = "xxx";
// Create connection
$conn = new mysqli($servername, $username, $password, $dbname);
mysqli_query($conn, "SET SESSION sql_mode = ''");
// Check connection
if ($conn->connect_error) {
die("Connection failed: " . $conn->connect_error);
}
$startTime = date('Y-m-d H:i:s');
/* ----------------------------------------------------------------------*/
/* ------------ IS A PREVIOUS FEED ALREADY RUNNING? ----------------*/
/* -------------------------------------------------------------------*/
$getFeedRunning = "SELECT * FROM feedsettings WHERE name = 'feedrunning';";
$rowFeedRunning = $conn->query( $getFeedRunning );
while($FeedRunning = $rowFeedRunning->fetch_assoc()) {
$feedRunningValue = $FeedRunning['val'];
}
if ($feedRunningValue == '0') {
/* ----------------------------------------------------------------------*/
/* ---------------- UPDATE FEEDRUNNING -------------------------*/
/* -----------------------------------------------------------------------*/
$updateFeedRunning = "UPDATE feedsettings SET val='1', updated_at='".$startTime."' WHERE name='feedrunning';";
if ($conn->query($updateFeedRunning) === TRUE) {
//echo "Record updated successfully";
} else {
//echo "Error updating record: " . $conn->error;
}
/* ---------------------------------------------------------------------*/
/* ---------------- ADD TO LOG TABLE -------------------------- */
/* ------------------------------------------------------------------------*/
$sqlLogs = "INSERT INTO importlogs (totallistings, starttime, endtime, totalimages, lastindex, newlastindex, comments) VALUES ('', '$startTime', '', '', '', '', 'Feed started!')";
if ( $conn->query( $sqlLogs ) === true ) {
//echo "New record created successfully";
} else {
//echo "Error: " . $sql . "<br>" . $conn->error;
}
/* --------------------------------------------------------
------- RETS APIS ------------
--------------------------------------------------------*/
require_once("vendor/autoload.php");
$config = new \PHRETS\Configuration;
$config->setLoginUrl('xxx')
->setUsername('xxx')
->setPassword('xxx')
->setUserAgent('Bright RETS Application/1.0')
->setRetsVersion('1.7.2');
$rets = new \PHRETS\Session($config);
$connect = $rets->Login();
/* -----------------------------------------------------------------*/
/* ---------------- AMAZON S3 ---------------------------*/
/* -----------------------------------------------------------------*/
$credentials = new Aws\Credentials\Credentials('xxx', 'xxx');
$s3 = new Aws\S3\S3Client([
'region' => 'us-east-2',
'version' => 'latest',
'credentials' => $credentials,
//'debug' => true
]);
$s3->registerStreamWrapper();
/* ----------------------------------------------------------------------*/
/* LASTINDEX: WHAT WAS THE LAST IMPORTED DATE: EG: 2019-06-24T08:45:45 */
/* -------------------------------------------------------------------*/
$getLastIndex = "SELECT * FROM feedsettings WHERE name = 'lastindex';";
$rowLastIndex = $conn->query( $getLastIndex );
while($lastIndex = $rowLastIndex->fetch_assoc()) {
$lastIndexValue = $lastIndex['val'];
}
$newLastIndex = date('Y-m-d').'T'.date('H:i:s');
/* ----------------------------------------------------------------------*/
/* --- FETCH NUMBER OF LISTINGS: MAP LASTINDEX TO MODIFICATION DATE ----*/
/* -------------------------------------------------------------------*/
$totalProperties = $rets->Search('Property', 'ALL', '(ModificationTimestamp='.$lastIndexValue.'+)', ['Limit' => 1]);
$totalPropertiesCount = $totalProperties->getTotalResultsCount();
$numberOfOffsetPages = ceil($totalPropertiesCount / 1000);
$iCount = 0;
$numberOfPhotos = 0;
for ($offset = 1; $offset <= $totalPropertiesCount; $offset=($iCount*1000)+1) {
/* ------------ FETCH RECORDS PER 1000 LISTINGS --------------- */
$results = $rets->Search('Property', 'ALL', '(ModificationTimestamp='.$lastIndexValue.'+)', [
'Limit' => 1000,
'Offset' => $offset,
'StandardNames' => 0, // give system names
]);
foreach ( $results as $r ) {
$retsdata = array();
foreach ( $filteredretsfields as $key => $value ) {
$retsdata[ $key ] = htmlspecialchars( $r[ $value ], ENT_QUOTES );
}
$date = date( 'Y-m-d H:i:s' );
$retsdata['created_at'] = $date;
$retsdata['updated_at'] = $date;
$columns = implode( ",", array_keys( $retsdata ) );
$values = "'" . implode( "','", $retsdata ) . "'";
$searchsql = "SELECT * FROM properties WHERE ListingId = '" . $r['ListingId'] . "'";
$checkExistingListings = $conn->query( $searchsql );
if ( $checkExistingListings->num_rows > 0 ) {
//DELETE EXISTING RECORDS
$conn->query( 'DELETE FROM properties WHERE ListingId = "' . $r['ListingId'] . '";' );
$conn->query( 'DELETE FROM images WHERE ListingId = "' . $r['ListingId'] . '";' );
//TODO DELETE THE EXISTING IMAGES
//INSERT NEW RECORD
$sql = "INSERT INTO properties ($columns) VALUES ($values)";
} else {
//INSERT NEW RECORD
$sql = "INSERT INTO properties ($columns) VALUES ($values)";
}
if ( $conn->query( $sql ) === true ) {
//echo "New record created successfully -> ";
//echo $sql;
} else {
//echo "Error: " . $sql . "<br>" . $conn->error;
}
/* -------------------------------------------------------------------*/
/* ------------ FETCH IMAGES ----------------*/
/* -------------------------------------------------------------------*/
$numberOfPhotos = $numberOfPhotos + $r['TotalPhotos'];
$photos = '';
$photoNumber = 0;
if ($r['TotalPhotos'] != 0) {
for ($photoNumber = 1; $photoNumber <= $r['TotalPhotos']; $photoNumber++) {
/* ----- FETCH PHOTO PER PHOTO TO AVOID MEMORY ISSUES ----- */
/* ---- FETCH ALL PHOTOS IN ONE GETOBJECT() CRASHED THE SERVER ---- */
$photos = $rets->GetObject("Property", "Thumbnail", $r['ListingKey'], $photoNumber);
for ($i = 0; $i < 4096; $i++) {
preg_replace('/\d/', '', 1);
}
foreach ($photos as $photo) {
$listing = $r['ListingId'];
$number = $photo->getObjectId();
if (!$photo->isError()) {
$key = $listing.'/image-'.$listing.'-'.$number.'.jpg';
// Send a PutObject request and get the result object.
$context = stream_context_create(array(
's3' => array(
'ACL' => 'public-read'
)
));
/* -- COPY THE PHOTO TO S3 -- */
file_put_contents('s3://my-bucket-url/'.$key, $photo->getContent(), 0, $context);
$sqlImages = "INSERT INTO images (ListingId, storedUrl) VALUES ('$listing', '$key')";
if ( $conn->query( $sqlImages ) === true ) {
//echo "New record created successfully -> ";
//echo $sql;
} else {
//echo "Error: " . $sqlImages . "<br>" . $conn->error;
}
}
else {
//echo 'error';
}
}
}
unset($photos);
}
/* ----- CLEAN VARIABLES --------------------------------*/
unset($columns, $values, $retsdata);
}
/* ----- CLEAN VARIABLES --------------------------------*/
unset($results);
$iCount++;
}
$endTime = date('Y-m-d H:i:s');
/* ----------------------------------------------------------------------*/
/* ---------------- UPDATE LASTINDEX -------------------------*/
/* -----------------------------------------------------------------------*/
$updateLastIndex = "UPDATE feedsettings SET val='".$newLastIndex."', updated_at='".$endTime."' WHERE name='lastindex';";
if ($conn->query($updateLastIndex) === TRUE) {
//echo "Record updated successfully";
} else {
//echo "Error updating record: " . $conn->error;
}
/* ----------------------------------------------------------------------*/
/* ---------------- UPDATE FEEDRUNNING -------------------------*/
/* -----------------------------------------------------------------------*/
$updateFeedRunning = "UPDATE feedsettings SET val='0', updated_at='".$endTime."' WHERE name='feedrunning';";
if ($conn->query($updateFeedRunning) === TRUE) {
//echo "Record updated successfully";
} else {
//echo "Error updating record: " . $conn->error;
}
/* ---------------------------------------------------------------------*/
/* ---------------- ADD TO LOG TABLE -------------------------- */
/* ------------------------------------------------------------------------*/
$sqlLogs = "INSERT INTO importlogs (totallistings, starttime, endtime, totalimages, lastindex, newlastindex) VALUES ($totalPropertiesCount, '$startTime', '$endTime', $numberOfPhotos, '$lastIndexValue', '$newLastIndex')";
if ( $conn->query( $sqlLogs ) === true ) {
//echo "New record created successfully -> ";
//echo $sql;
} else {
//echo "Error: " . $sql . "<br>" . $conn->error;
}
} else {
$endTime = date('Y-m-d H:i:s');
/* ---------------------------------------------------------------------*/
/* ---------------- ADD TO LOG TABLE -------------------------- */
/* ------------------------------------------------------------------------*/
$sqlLogs = "INSERT INTO importlogs (totallistings, starttime, endtime, totalimages, lastindex, newlastindex, comments) VALUES ('', '$startTime', '$endTime', '', '', '', 'Feed already running!')";
if ( $conn->query( $sqlLogs ) === true ) {
//echo "New record created successfully -> ";
//echo $sql;
} else {
//echo "Error: " . $sql . "<br>" . $conn->error;
}
}
$conn->close();
原则上代码运行良好,虽然我有一些内存问题,请参阅代码中的注释。我主要关心的是如何改进、加快导入过程。我需要索引的 MLS 是 Bright MLS。Bright 要求我们将所有数据(包括图像)存储在我们的服务器上。所以我在过去 18 小时内运行了这段代码,它导入了大约 8000 个列表和 200.000 张图片,然后由于网关错误而崩溃。
我知道这种大规模导入只需要一次(在开始时),之后我可以每 x 小时进行一次部分更新。
所以我有两个问题:
我怎样才能让它更稳定,让我更容易从这些崩溃中恢复,因为这意味着我每次都需要从头开始。
我怎样才能加快导入速度,因为以这种速度,导入所有列表需要 11 天。也许我不需要保留5年的历史
我正在考虑导入所有没有图像的内容,存储图像 url,以便索引完整的 MLS。在我运行一个进程来一张一张地获取这些图像之后(这需要 11 天)。关于将图像导入推送到 Amazon SQS 队列的任何想法(没有经验)。
非常感谢