我在将 disqus 评论导入我的新网站时遇到了同样的问题。最近我想出了一些方法来进行解析并将评论插入到 wordpress 数据库中,所有都是用 PHP 编写的,请参考这个这个链接,欢迎任何反馈
这是代码
// start to count the timer
$start = microtime( true );
$max_duration = ”;
// get content of file and parse the xml
$xml = simplexml_load_file( ‘yourmxlfilesource.xml’ );
// initiate database connection
$database_info[ 'hostname' ] = “”; // database hostname
$database_info[ 'database' ] = “”; // database name
$database_info[ 'username' ] = “”; // database username
$database_info[ 'password' ] = “”; // database password
$database_connect = mysql_pconnect( $database_info[ 'hostname' ], $database_info[ 'username' ], $database_info[ 'password' ] ) or trigger_error( mysql_error(), E_USER_ERROR );
mysql_select_db( $database_info[ 'database' ], $database_connect );
$i = 0;
// get all the comment from xml file
$comments = get_post();
// get all the post title array from xml file
$post_title = get_post_title_array();
$comment_result = array( );
$temp = array( );
// create loop to convert from xml comment into wordpress-format comment
foreach ( $comments as $comment ) {
$start_sub = microtime( true );
$comment_result[ 'comment_post_ID' ] = get_post_id( $comment->thread->attributes( ‘dsq’, TRUE )->id );
$comment_result[ 'comment_author' ] = $comment->author->name;
$comment_result[ 'comment_author_email' ] = $comment->author->email;
$comment_result[ 'comment_author_url' ] = ”;
$comment_result[ 'comment_author_IP' ] = $comment->ipAddress;
$comment_result[ 'comment_date' ] = sanitize_date( $comment->createdAt );
$comment_result[ 'comment_date_gmt' ] = sanitize_date( $comment->createdAt );
$comment_result[ 'comment_content' ] = strip_tags( mysql_real_escape_string( $comment->message ), ‘<br><img><a>’ );
$comment_result[ 'comment_karma' ] = 1;
// check if comment is spam, deleted or approved
if ( $comment->isSpam == ‘true’ ) {
$comment_approved = ‘spam’;
} else if ( $comment->isDeleted == ‘true’ ) {
$comment_approved = ‘trash’;
} else {
$comment_approved = 1;
}
$comment_result[ 'comment_approved' ] = $comment_approved;
$comment_result[ 'comment_agent' ] = ”;
$comment_result[ 'comment_type' ] = ”;
$comment_result[ 'comment_parent' ] = ”;
$comment_result[ 'user_id' ] = ”;
// store the wordpress format comment into temporary variable
$temp[ $i ] = $comment_result;
// insert the wordpress format comment into wp database
insert_comment( $temp[ $i ] );
$duration[ $i ] = microtime( true ) – $start_sub;
$i++;
}
echo ‘max duration : ‘ . max( $duration ) . ‘<br/>’;
echo ‘min duration : ‘ . min( $duration ) . ‘<br/>’;
echo ‘average duration : ‘ . ( array_sum( $duration ) / count( $duration ) ) . ‘<br/>’;
// show the total duration of process
echo ‘total duration : ‘ . ( microtime( true ) – $start );
///////// define function here
function insert_comment( $comment )
{
global $database_connect;
// function to insert the comment into wp database
$field = ”;
$values = ”;
foreach ( $comment as $key => $value ) {
// create sql query to insert the comment
$field .= ‘`’ . $key . ‘`’ . ‘,’;
$values .= ‘”‘ . $value . ‘”‘ . ‘,’;
}
$field = rtrim( $field, ‘,’ );
$values = rtrim( $values, ‘,’ );
// insert the comment into the database
$query = “INSERT INTO `wp_comments` ($field) VALUES ($values)”;
$query_result = mysql_query( $query, $database_connect ) or die( mysql_error() );
}
function sanitize_date( $date )
{
// remove the additional string from the date
$date = str_replace( ‘T’, ‘ ‘, $date );
$date = str_replace( ‘Z’, ‘ ‘, $date );
return $date;
}
function get_post_id( $thread )
{
global $post_title, $database_connect;
// get wordpress post id from disqus thread id
$thread_title = find_thread( ‘id’, $thread, ‘title’ ); // get the title of the post
$thread_title = explode( ‘/’, $thread_title );
$thread_title = $thread_title[ count( $thread_title ) - 1 ];
$thread_title = str_replace( ‘-’, ‘ ‘, $thread_title );
$thread_title = str_replace( ‘.html’, ”, $thread_title );
$post_title_closest = get_closest_post_title( $thread_title, $post_title );
// get the wordpress post id from the title of the post
$query = “SELECT `ID` FROM `wp_posts` WHERE `post_title` = ‘$post_title_closest’ LIMIT 1″;
$query_result = mysql_query( $query, $database_connect ) or die( mysql_error() );
$query_result_row = mysql_fetch_assoc( $query_result );
return $query_result_row[ 'ID' ];
}
function get_closest_post_title( $input, $words )
{
// no shortest distance found, yet
$shortest = -1;
// loop through words to find the closest
foreach ( $words as $word ) {
// calculate the distance between the input word,
// and the current word
$lev = levenshtein( $input, $word );
// check for an exact match
if ( $lev == 0 ) {
// closest word is this one (exact match)
$closest = $word;
$shortest = 0;
// break out of the loop; we’ve found an exact match
break;
}
// if this distance is less than the next found shortest
// distance, OR if a next shortest word has not yet been found
if ( $lev <= $shortest || $shortest < 0 ) {
// set the closest match, and shortest distance
$closest = $word;
$shortest = $lev;
}
}
return $closest;
}
function get_post_title_array( )
{
// get wordpress post id from disqus thread id
global $database_connect;
// get the wordpress post id from the title of the post
$query = “SELECT DISTINCT(`post_title`) FROM `wp_posts`”;
$query_result = mysql_query( $query, $database_connect ) or die( mysql_error() );
$query_result_row = mysql_fetch_assoc( $query_result );
$i = 0;
do {
$result[ $i ] = $query_result_row[ 'post_title' ];
$i++;
} while ( $query_result_row = mysql_fetch_assoc( $query_result ) );
return $result;
}
function find_thread( $category, $source_value, $return_category )
{
// function to get thread information
global $xml;
foreach ( $xml->children() as $row ) {
if ( (int) $row->attributes( ‘dsq’, TRUE )->id == (int) $source_value ) {
return $row->$return_category;
}
}
}
function get_post( )
{
// function to get all post from xml data
global $xml;
$i = 0;
foreach ( $xml->children() as $key => $value ) {
if ( $key == ‘post’ ) {
$result[ $i ] = $value;
$i++;
}
}
return $result;
}