-1

我将大量数据从 xml 文件保存到 sql 数据库中。前段时间 XML 文件的结构发生了变化。现在有一个语言“de”的数据记录和一个语言“en”的记录。由于结构发生了变化,因此存在一些双重返回的错误。

这是我的新结构:

<?xml version="1.0" encoding="UTF-8"?>
<Results>
<SearchResult Language="de"><RouteID>28338</RouteID>..............
<SearchResult Language="en"><RouteID>28338</RouteID>..............
<SearchResult Language="de"><RouteID>28396</RouteID>..............
<SearchResult Language="en"><RouteID>28396</RouteID>..............

我只想通过我的 php-xml-parser 从德语记录中读取。非常感谢您的帮助!

我的代码:

<?php    
defined('_JEXEC') or die('Restricted access');
/**
* Parser Class
*/
class Parser
{
var $dbo;
var $partner;

function __construct($dbo, $parser, $id) {
    $this->dbo = $dbo;
    $this->partner = new stdClass();
    $this->partner->name = $parser;
    $this->partner->id = $id;
}

function go() {
    error_reporting(E_ALL);

    // creating parser object
    $xml_parser = xml_parser_create('UTF-8');

    // set encoding to UTF-8
    xml_parser_set_option($xml_parser,
            XML_OPTION_TARGET_ENCODING,
            'UTF-8');

    // initializing the handler class
    $handler = new XMLHandlerClass(
            $this->dbo,
            $this->partner);

    // setting up the handlers
    xml_set_element_handler($xml_parser,
            array($handler, 'startElement'),
            array($handler, 'endElement'));
    xml_set_character_data_handler($xml_parser,
            array($handler, 'contents'));

    // path to the xml file
    $file = JPATH_SITE.DS.'tmp'.DS.'routes.xml';

    // open the file to read
    if (!($fp = fopen($file, "r"))) {
        die('konnte xml nicht öffnen');
        return false;
    }

    // read line after line from the xml file
    $data = null;
    while ($data = fgets($fp, 100000)) {
        if (!xml_parse($xml_parser, $data, feof($fp))) {
            die(sprintf("XML error: %s at line %d",
                xml_error_string(xml_get_error_code($xml_parser)),
                xml_get_current_line_number($xml_parser)));
        }
    }

    // free the xml parser
    xml_parser_free($xml_parser);

    // every thing fine?
    return true;
    }
    }

 /**
* XML Handler Class
*/
class XMLHandlerClass
{
var $dbo;
var $partner;

var $element;

var $saveCounter = 0;
var $clearCounter = 0;

var $cabintype;
var $counterPrice = 0;

var $counterRoute = 0;

var $item;

var $api;

function __construct($dbo, $partner) {

    // save params
    $this->dbo = $dbo;
    $this->partner = $partner;

    // include api
    include_once JPATH_COMPONENT_ADMINISTRATOR
            .DS.'api'.DS.'KreuzfahrtenAPI.php';

    // initialize api
    $this->api = new KreuzfahrtenAPI($this->dbo,
            $this->partner->name,
            $this->partner->id);
}

function startElement($parser, $name, $attrs) {
    // don't give up
    set_time_limit(120);

    // get the name
    $this->element = strtolower($name);

    // bevor das speichern der tripps beginnt,
    // aber eigentlich schon die routen informationen da sind
    // soll die dauer noch um 1 gekürzt werden
    if ($this->element == "listoftrips") {
        $this->item['dauer'] -= 1;
    }
}

function endElement($parser, $name) {

    // don't give up
    set_time_limit(120);

    // get the name
    $name = strtolower($name);

    // counter hochzählen
    switch ($name) {
        case "port":
            $this->counterRoute++;
            break;
        case "cabin":
            $this->counterPrice++;
            break;
        case "trip":
            $this->saveCounter++;
            $this->save();
            break;
        case "route":
            $this->clearCounter++;
            $this->clear();
            break;
    }
}

function contents($parser, $data) {

    // remove quots
    $data = $this->reEn($data);

    // assign the $data by the $this->element
    switch ($this->element) {
        case "routeid":
            $this->item['routeid'] .= $data;
            break;
        case "name":
            $this->item['titel'] .= $data;
            break;
        case "duration":
            $this->item['dauer'] .= $data;
            break;
        case "port":
            if ($this->counterRoute != 0 && $this->counterRoute != 1) {
                $this->item['route'][$this->counterRoute-2] .= $data;
            }
            break;
        case "priceoverview":
            if ($data == "Einzel") break;
            switch ($data) {
                case "bestinsidecabinprice":
                    $this->cabintype = "Innenkabine";
                    break;
                case "bestoutsidecabinprice":
                    $this->cabintype = "Außenkabine";
                    break;
                case "bestsuitecabinprice":
                    $this->cabintype = "Suite";
                    break;
                case "bestbalconycabinprice":
                    $this->cabintype = "Balkonkabine";
                    break;
                default:
                    die('Kein passender Kabinentyp '.$data);
                    break;
            }
            break;
        case "bestprice":
            $cabinetype = $this->cabintype;
            $this->item['prices'][$cabinetype][$this->counterPrice] .= str_replace(',', '.', $data);
            unset($cabinetype);
            break;
        case "tripbegins":
            $this->item['beginn'] .= $data;
            break;
        case "tripends":
            $this->item['ende'] .= $data;
            break;
        case "destinationname":
            $this->item['zielgebiet'] .= $data;
            break;
        case "shipname":
            $this->item['schiff'] .= $data;
            break;
    }
}

function reEn($subject) {
    $toDel = array (
        '"',
        "'"
    );
    $toRe = array(
        '',
        ''
    );
    $subject = str_replace($toDel, $toRe, $subject);
    return $subject;
}

function clear($error = false) {
    if ($error) {
        $title = "ERROR:Clear";
        $description = "ITEM:\n"
            .$this->api->implodeItem($this->item);
        $this->api->setErrorMsg($title,$description);
    }

    $this->element = null;
    $this->item = null;
    $this->cabintype = null;
    $this->counterPrice = 0;
    $this->counterRoute = 0;
}

function save() {
    // neues zeitlimit setzen
    set_time_limit(120);

    // prüfe ob alle daten vorhanden sind
    if (empty($this->item['titel'])
            || empty($this->item['routeid'])
            || empty($this->item['dauer'])
            || !is_array($this->item['route'])
            || !is_array($this->item['prices'])
            || empty($this->item['beginn'])
            || empty($this->item['ende'])
            || empty($this->item['zielgebiet'])
            || empty($this->item['schiff'])
                    ) {
        $this->clear(TRUE);
        return false;
    } else {
        // daten für api vorbereiten

        // -- url
        $this->item['url'] = "www.domain.com/?fuseaction=product.showroute="
                .$this->item['routeid']
                ."&ID=XXX";
        // -- datum
        $this->item['beginn'] = $this->api->convertDate(
                explode('.', $this->item['beginn'])
        );
        $this->item['ende'] = $this->api->convertDate(
                explode('.', $this->item['ende'])
        );
        // reise mit der api speichern
        $this->api->save($this->item);

        // nicht mehr nötige daten löschen
        unset($this->item['beginn'],
                $this->item['ende'],
                $this->item['prices']);
    }
}
}
?>

错误:在某些记录中,我得到 3 到 4 个不同的名称

schiff = CamargueModiglianiModigliani
zielgebiet = RhoneSaôneRheinRhine
routeid = 282992829928338

我认为语言属性必须是这之间的联系......

4

2 回答 2

1

为什么不只阅读每条记录,只考虑那些有'Language="de"' 的记录呢?

编辑 :

在保存方法中,在“如果为空”部分添加以下条件:

|| $this->item['Language'] != "de"

这将使非德语的记录被清除,就好像它们不适合一样。

于 2013-06-13T21:13:15.823 回答
0

使用simplexmlandxpath来选择只删除条目:

$xml = simplexml_load_file("datei.xml");
$de = $xml->xpath("//SearchResult[@Language = 'de']");

现在,$de包含一个包含所有节点的数组Language = "de"。您可以迭代并将值存储到您的数据库中。

如果您展示了更完整和有效的 XML 片段,我们可以为您提供更具体的支持。

于 2013-06-13T21:58:12.623 回答