将 GNU awk 用于 gensub():
$ cat tst.awk
BEGIN { RS=""; ORS="\n\n"; FS=OFS="\n" }
NF<3 { $3=$2; $2="Email: MISSING_EMAIL_ADDR" }
{ print gensub(/(^|\n)[^:]+:[[:space:]]*/,"\\1","g") }
$ gawk -f tst.awk file
Norman Normalrecord
norman@ooga.com
123 Main street
Missy Missington
MISSING_EMAIL_ADDR
789 Back street
Reggie Regularrecord
reggie@booga.com
456 Middle street
您可以在任何 awk 中使用 sub(/^..) 然后 gsub(/\n...) 而不是 gensub(/(^|\n)...) 来执行相同的操作。
如果有用,请识别任何缺失的字段并按照字段在输入中使用的顺序为其提供“缺失”指示,而无需预先明确命名任何字段(假设每个字段至少出现在一个记录)将是:
$ cat tst.awk
BEGIN { RS=""; FS=OFS="\n" }
{
for (fldNr=1; fldNr<=NF; fldNr++) {
split($fldNr,nameVal,/:[[:space:]]*/)
name = nameVal[1]
val = nameVal[2]
rec[NR,name] = val
if (!seen[name]++) {
for (nameNr=++numNames; nameNr>fldNr; nameNr--) {
names[nameNr] = names[nameNr-1]
}
names[nameNr] = name
}
}
}
END {
for (recNr=1; recNr<=NR; recNr++) {
for (nameNr=1; nameNr<=numNames; nameNr++) {
name = names[nameNr]
key = recNr SUBSEP name
if (key in rec) {
print rec[key]
}
else {
print "MISSING_" toupper(name)
}
}
print ""
}
}
$
$ cat file
Name: Norman Normalrecord
Email: norman@ooga.com
Addr: 123 Main street
Name: Missy Missington
Addr: 789 Back street
Name: Reggie Regularrecord
Email: reggie@booga.com
Addr: 456 Middle street
Whatever: Some useful info
$
$ awk -f tst.awk file
Norman Normalrecord
norman@ooga.com
123 Main street
MISSING_WHATEVER
Missy Missington
MISSING_EMAIL
789 Back street
MISSING_WHATEVER
Reggie Regularrecord
reggie@booga.com
456 Middle street
Some useful info