这是我awk
认为比sed
. 这个程序。离开 LaTeX 命令(当单词以“\”开头时),它将保留单词的第一个大写字母。LaTeX 命令(和普通文本)的参数将被字典文件替换。当 [rev] 程序的第三个参数打开时,它将用同一个字典文件进行反向替换。任何非 alpha-beta 字符都可以用作单词分隔符(在 LaTeX 源文件中是必需的)。prg 将其输出写入屏幕 (stdout),因此您需要使用重定向到文件 (>output_f)。(我认为你的 LaTeX 源的 inputencoding 是 1 字节/字符。)
> cat dic.sh
#!/bin/bash
(($#<2))&& { echo "Usage $0 dictionary_file latex_file [rev]"; exit 1; }
((d= $#==3 ? 0:1))
awk -v d=$d '
BEGIN {cm=fx=0; fn="";}
fn!=FILENAME {fx++; fn=FILENAME;}
fx==1 {if(!NF)next; if(d)a[$1]=$2; else a[$2]=$1; next;} #read dict or rev dict file into an associative array
fx==2 { for(i=1; i<=length($0); i++)
{c=substr($0,i,1); #read characters from a given line of LaTeX source
if(cm){printf("%s",c); if(c~"[^A-Za-z0-9\\\]")cm=0;} #LaTeX command is occurred
else if(c~"[A-Za-z]")w=w c; else{pr(); printf("%s",c); if(c=="\\")cm=1;} #collect alpha-bets or handle them
}
pr(); printf("\n"); #handle collected last word in the line
}
function pr( s){ # print collected word or its substitution by dictionary and recreates first letter case
if(!length(w))return;
s=tolower(w);
if(!(s in a))printf("%s",w);
else printf("%s", s==w ? a[s] : toupper(substr(a[s],1,1)) substr(a[s],2));
w="";}
' $1 $2
字典文件:
> cat dictionary
apple lemon
raspberry cherry
pear banana
输入 LaTeX 源:
> cat src.txt
Apple123pear,apple "pear".
\Apple123pear{raspberry}{pear}[apple].
Raspberry12Apple,pear.
执行结果:
> ./dic.sh
Usage ./dic.sh dictionary_file latex_file [rev]
> ./dic.sh dictionary src.txt >out1.txt; cat out1.txt
Lemon123banana,lemon "banana".
\Apple123pear{cherry}{banana}[lemon].
Cherry12Lemon,banana.
> ./dic.sh dictionary out1.txt >out2.txt rev; cat out2.txt
Apple123pear,apple "pear".
\Apple123pear{raspberry}{pear}[apple].
Raspberry12Apple,pear.
> diff src.txt out2.txt # they are identical