如果重命名例如。文件document.docx到document.docx.unzipped.zip可以提取该存档,例如。到文件夹' document.docx.unzipped '。不幸的是,提取的xml 文件不是很可读,因为所有 xml 信息都在一行中。
我想自动化提取 docx 存档并从存档中转换所有xml 文件的过程。提取文件夹(document.docx.unzipped)到可读/漂亮的版本 (如 Notepad++ --> Extensions --> XML Tools --> Pretty Print(XML 仅带换行符))
有什么快速方法的想法吗?
EDIT1:从https://stackoverflow.com/users/1761490/pawel-jasinski修改想法
#!/bin/sh
# this scripts unpacks and reformat docx files
#
# you need xslt processor (Transform) in your path
# /c/Program Files/Saxonica/SaxonHE9.4N/bin/Transform
#
# make sure to copy remove-rsid.xslt and copy.xslt
if [ "$1" = "-r" ]; then
remove_rsid=1
shift
fi
if [ "$1" = "" ]; then
echo expected name of the word document to be exploded
exit 1
fi
suffix=${1##*.}
name="$1"
if [ "$suffix" = "xml" ]; then
suffix=docx
name=${1/%.xml/.docx}
fi
if [ "$suffix" = "$1" ]; then
suffix=docx
name=$1.docx
fi
corename=$(basename "$name" .$suffix)
if [ -z "$corename" ]; then
echo can not work with empty name
exit 1
fi
DIR="$( cd "$( dirname "$0" )" && pwd )"
DOSDIR=$(cygpath -m $DIR)
FLAT=$PWD/$corename.tmp/flat.$$
FLATOUT=$PWD/$corename.tmp/flat.$$.out
if [ "$remove_rsid" == "1" ]; then
transform=$DOSDIR/remove-rsid.xslt
else
transform=$DOSDIR/copy.xslt
fi
# $1 - file name
#
# formats file as xml
_reformat_xml() {
echo reformat $1
#read pause
xmllint --format $1 -o $1.new
mv $1.new $1
}
flaten() {
# xml
xmls=""
pwd
pwd
#read pause
for f in $(find . -name '*.xml'); do
ff=$(echo ${f#./} | tr '/' '@')
echo mv $f $FLAT/$ff
mv $f $FLAT/$ff
_reformat_xml $FLAT/$ff
xmls="$xmls $ff"
done
# for rels, rename into .xml
rels=""
for f in $(find . -name '*.rels'); do
ff=$(echo ${f#./} | tr '/' '@')
rels="$rels $ff"
mv $f $FLAT/$ff.xml
_reformat_xml $FLAT/$ff.xml
#read pause
done
}
expand_dirs() {
target_dir=$(pwd)
cd $FLATOUT
echo PDW: $PWD
#read pause
for f in $rels ; do
ff=$(echo $f | tr '@' '/')
mv $f.xml "$target_dir/$ff"
done
for f in $xmls ; do
echo PDW: $PWD
#read pause
ff=$(echo $f | tr '@' '/')
mv $f "$target_dir/$ff"
done
cd "$target_dir"
}
echo corename: $corename
read pause
if [ -e "$corename" ]; then
if [ -e "$corename.bak" ];then
# echo removing $corename.bak
rm -rf "$corename.bak"
fi
# echo backing up $corename
mv "$corename" "$corename.bak"
fi
mkdir "$corename"
cd "$corename"
unzip -q "../$name"
if [ -a $FLAT ]; then
rm -rf $FLAT
fi
mkdir -p $FLAT
flaten
if [ -a $FLATOUT ]; then
rm -rf $FLATOUT
fi
mkdir -p $FLATOUT
#exit
#dosflat=$(cygpath -m $FLAT)
#Transform -xsl:$transform -s:$dosflat -o:$dosflat.out
cp -R $FLAT/* $FLATOUT
expand_dirs
read pause #
rm -rf $FLAT $FLATOUT