我正在尝试将 xml 转储从 trac db 转换为 mediawiki 格式。我正在使用以下 perl 脚本来执行转换:
#!/usr/bin/perl
# trac2mw
# Converts a trac wiki to MediaWiki format
# Input:
# MySQL XML export of the Trac database
# Output:
# MediaWiki XML, suitable for feeding to Special:Import
#
# by Matthew Sachs <matthewg@zevils.com>
#
#This work is hereby released into the Public Domain. To view a copy
#of the public domain dedication, visit:
# http://creativecommons.org/licenses/publicdomain/
# Name of your MediaWiki
use constant WIKI_NAME => "MyWiki";
# Front page of your MediaWiki
use constant WIKI_BASE => "http://example.com/Wiki/Main_Page";
# To whitelist pages, list them here, one per line, e.g.
# @includePages = qw(
# FooBar
# FooBaz
# );
#
# If any pages are listed, only pages on the list will be included.
my @includePages = qw(
);
# Pages can be renamed.
# Trac page name => MediaWiki page name
my %renamePages = (WikiStart => "Main Page");
# Trac -> MediaWiki author map.
# TracUser => [MediaWiki username, MediaWiki user ID]
# Example:
# my %authors = (
# matthew => ["Matthew", 1],
# liz => ["Liz", 2]
# );
my %authors = ();
# END CONFIGURATION
use strict;
use warnings;
use XML::LibXML;
use POSIX qw(strftime);
our $VERSION = "0.1";
sub fixWikiList {
my($in) = @_;
my $ret = "";
my @rows = split(/\n/, $in);
my @inlists = ();
foreach my $row (@rows) {
$row =~ s/^ ( *)([#*])//;
my($space, $toplist) = ($1, $2);
while(length($space) < @inlists) {
pop @inlists;
}
push @inlists, $toplist;
$ret .= join("", @inlists) . $row . "\n";
}
return $ret;
}
sub fixWikiTable {
my($in) = @_;
my $ret = "{|\n";
my @rows = split(/\n/, $in);
foreach my $row (@rows) {
$ret .= "|-\n";
$row =~ s/^\|\|//;
$row =~ s/\|\|$//;
foreach my $col (split(/\|\|/, $row)) {
$ret .= "| $col\n";
}
}
$ret =~ s/\n\| -$/\n/;
$ret .= "|}\n";
return $ret;
}
sub fixWikiText {
my($text) = @_;
# [wiki:Foo] -> [Foo]
$text =~ s/\[wiki:/[/g;
# [Link] -> [[Link]]
$text =~ s/\[([a-z0-9 _-]+)\]/[[$1]]/ig;
# <sp><sp>* -> **
$text =~ s/((?:^(?: +)(?:[*#])(?:.*)$(?:\n?))+)/fixWikiList($1)/gem;
# CamelCase -> [[CamelCase]]
$text =~ s^ ([A-Z][a-z0-9]+[A-Z][a-z0-9]+[a-zA-Z0-9]*)\b^ [[$1]]^g;
# Table syntax
$text =~ s/((?:^\|\|.*\|\|$(?:\n?))+)/fixWikiTable($1)/gem;
return $text;
}
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file(shift);
my $root = $doc->documentElement();
my @wikiPages = $root->findnodes("//table_data[\@name='wiki']/row");
my %pages;
foreach my $node (@wikiPages) {
my $name = $node->find("field[\@name='name']")->[0]->textContent();
next unless grep { $_ eq $name } @includePages;
$name = $renamePages{$name} if $renamePages{$name};
my $text = $node->find("field[\@name='text']")->[0]->textContent();
my $author = $node->find("field[\@name='author']")->[0]->textContent();
my $time = $node->find("field[\@name='time']")->[0]->textContent();
my $version = $node->find("field[\@name='version']")->[0]->textContent();
$version--;
$pages{$name} ||= [];
$pages{$name}->[$version] = {
text => $text,
author => $author,
time => $time,
};
}
my $outDoc = XML::LibXML::Document->new();
my $mw = XML::LibXML::Element->new("mediawiki");
$outDoc->setDocumentElement($mw);
use constant MW_NS => "http://www.mediawiki.org/xml/export-0.3/";
use constant XSI_NS => "http://www.w3.org/2001/XMLSchema-instance";
$mw->setNamespace(XSI_NS, "xsi");
$mw->setNamespace(MW_NS);
$mw->setAttributeNS(XSI_NS, "schemaLocation", "http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd");
$mw->setAttributeNS(MW_NS, "version", "0.3");
$mw->setAttribute("xml:lang", "en");
my $siteinfo = XML::LibXML::Element->new("siteinfo");
$mw->addChild($siteinfo);
$siteinfo->appendTextChild("sitename", WIKI_NAME);
$siteinfo->appendTextChild("base", WIKI_BASE);
$siteinfo->appendTextChild("generator", "trac2mw $VERSION");
$siteinfo->appendTextChild("case", "first-letter");
my $namespaces = XML::LibXML::Element->new("namespaces");
$siteinfo->addChild($namespaces);
foreach my $ns ([-2, "Media"], [-1, "Special"],
[0, ""], [1, "Talk"],
[2, "User"],
[3, "User talk"],
[4, WIKI_NAME],
[5, WIKI_NAME . " talk"],
[6, "Image"],
[7, "Image talk"],
[8, "MediaWiki"],
[9, "MediaWiki talk"],
[10, "Template"],
[11, "Template talk"],
[12, "Help"],
[13, "Help talk"],
[14, "Category"],
[15, "Category talk"],) {
my $nsElement = XML::LibXML::Element->new("namespace");
$namespaces->addChild($nsElement);
$nsElement->setAttribute("key", $ns->[0]);
$nsElement->appendText($ns->[1]);
}
my $pageID = 1000;
while(my($page, $revisions) = each(%pages)) {
my $pageNode = XML::LibXML::Element->new("page");
$mw->addChild($pageNode);
$pageNode->appendTextChild("title", $page);
$pageNode->appendTextChild("id", $pageID);
my $revID = 1;
foreach my $revision(@$revisions) {
my($text, $author, $time) = ($revision->{text},
$revision->{author},
$revision->{time});
$text = fixWikiText($text);
my $revision = XML::LibXML::Element->new("revision");
$pageNode->addChild($revision);
$revision->appendTextChild("id", $pageID + $revID);
$revision->appendTextChild("timestamp",
strftime("%Y-%m-%dT%H:%M:%SZ",
gmtime($time)));
my $contributor = XML::LibXML::Element->new("contributor");
$revision->addChild($contributor);
my($user_id) = $authors{$author};
if($user_id) {
$contributor->appendTextChild("username", $user_id->[0]);
$contributor->appendTextChild("id", $user_id->[1]);
}
my $textNode = XML::LibXML::Element->new("text");
$revision->addChild($textNode);
$textNode->setAttribute("xml:space", "preserve");
$textNode->appendText($text);
} continue {
$revID++;
}
} continue {
$pageID += 1000;
}
$outDoc->toFH(\*STDOUT, 1);
我的 xml 文件以:
<?xml version="1.0" encoding="utf-8"?>
<!--
- phpMyAdmin XML Dump
- version 3.3.7deb7
- http://www.phpmyadmin.net
-
- Host: localhost
- Generation Time: Jul 23, 2012 at 04:21 PM
- Server version: 5.1.61
- PHP Version: 5.3.3-7+squeeze13
-->
<pma_xml_export version="1.0">
<!--
- Database: 'trac'
-->
<database name="trac">
<!-- Table wiki -->
<table name="wiki">
<column name="name">TracAdmin</column>
<column name="version">1</column>
<column name="time">1205745178</column>
<column name="author">trac</column>
<column name="ipnr">127.0.0.1</column>
<column name="text">= TracAdmin =
[[TracGuideToc]]
...
当我开始转换时,我收到以下错误:
# perl trac2mw.pl tracxmldump.xml
bad ns attribute! at /usr/lib/perl5/XML/LibXML.pm line 1574.
这与脚本或 libxml 有关吗?我该如何解决?