0

这可能是一件非常困难的事情,或者不可能......

我需要查看文件中的每个段落,检查每个段落是否包含某个短语。如果它不包含该短语,我需要添加一行(例如段落中的第 4 行)。

这是一个例子:

[Paragraph Name]
Entry1 = foo
Entry2 = bar
Entry3 = foo
Entry4 = bar
Entry6 = foo
Entry7 = bar
Entry10 = foo

[Another Paragraph Name]
Entry1 = foo
Entry2 = bar
Entry4 = bar
Entry8 = foo

应该变成:

[Paragraph Name]
Entry1 = foo
Entry2 = bar
Entry3 = foo
Entry4 = bar

Entry6 = foo
Entry7 = bar


Entry10 = foo

[Another Paragraph Name]
Entry1 = foo
Entry2 = bar

Entry4 = bar



Entry8 = foo

每行的顺序永远不会改变(条目 1 总是在条目 2 之前,假设它们都存在)。每段有 14 行,每一行都需要检查(除了总是存在的标题 - 所以实际上只有 13 行会丢失)

感谢阅读,如果您知道这是不可能的,请说:)

4

3 回答 3

2

这当然不是很困难,更不用说不可能了:

/^\[.*\]$/ {
    entry=last=flag=0
    print
    next
}
{
    entry=$1
    gsub(/[^0-9]/,"",entry)
    if (flag)
    for(i=int(last);i<entry-1;i++)
        print ""
    last=entry
    flag=1
    print
}

将脚本保存到文件中,说script.awk并运行如下:

$ awk -f script.awk file
[Paragraph Name]
Entry1 = foo
Entry2 = bar
Entry3 = foo
Entry4 = bar

Entry6 = foo
Entry7 = bar


Entry10 = foo

[Another Paragraph Name]
Entry1 = foo
Entry2 = bar

Entry4 = bar



Entry8 = foo
于 2013-05-30T15:49:59.730 回答
0

我只是提供另一种解决此问题的方法。您将需要两个文件,一个是包含段落的主文件。第二个文件将按应该遇到的顺序保存关键字。

awk '
FNR==NR { # Storing keywords in a hash. 
    a[i++]=$1
    next 
} 
/Paragraph/ { # This can be a regex that denotes start of a new paragraph.
    j=0; 
    print
    next
} 
{ 
    while (NF>1) { 
        if (tmp=match ($0, a[j++])) # If your first line matches to first keyword
            { 
                print $0
                next 
        } 
        else 
            print ""  # If it doesn't match you print blank line and continue back
            continue 
    } 
}1' keywords para

测试:

$ awk '
> FNR==NR { 
>     a[i++]=$1
>     next 
> } 
> /Paragraph/ { 
>     j=0; 
>     print
>     next
> } 
> { 
>     while (NF>1) { 
>         if (tmp=match ($0, a[j++])) 
>             { 
>                 print $0
>                 next 
>         } 
>         else 
>             print "" 
>             continue 
>     } 
> }1' keywords para
[Paragraph Name]
Entry1 = foo
Entry2 = bar
Entry3 = foo
Entry4 = bar

Entry6 = foo
Entry7 = bar


Entry10 = foo

[Another Paragraph Name]
Entry1 = foo
Entry2 = bar

Entry4 = bar



Entry8 = foo

使用的文件:

$ cat keywords 
Entry1
Entry2
Entry3
Entry4
Entry5
Entry6
Entry7
Entry8
Entry9
Entry10

$ cat para 
[Paragraph Name]
Entry1 = foo
Entry2 = bar
Entry3 = foo
Entry4 = bar
Entry6 = foo
Entry7 = bar
Entry10 = foo

[Another Paragraph Name]
Entry1 = foo
Entry2 = bar
Entry4 = bar
Entry8 = foo
于 2013-05-30T18:23:14.257 回答
0

这是另一种方法awk

awk -F"Entry| " '/^\[|^$/ {f=1;print;next} {for (i=f;i<=$2;i++) if (i==$2) {print} else {print "";f++};f++}' file
[Paragraph Name]
Entry1 = foo
Entry2 = bar
Entry3 = foo
Entry4 = bar

Entry6 = foo
Entry7 = bar


Entry10 = foo

[Another Paragraph Name]
Entry1 = foo
Entry2 = bar

Entry4 = bar



Entry8 = foo

它是如何工作的:

awk -F"Entry| " '               # Sets the Field Separator to "Entry" or " " (Makes it easy to get the number)
    /^\[|^$/ {                  # Run this only if line starts with "[" or is a blank line (^$)
        f=1                     # Set counter to 1
        print                   # Print the line
        next                    # Skip to next record
    }
        {                       # This section is run on all "Entry" lines
        for (i=f;i<=$2;i++)     # Create a loop going from counter f to the number stored in Entry
            if (i==$2) {        # If these two numbers are equal, then:
                print           # print the line
                } 
            else {              # If not equal, then:
                print ""        # print a blank line
                f++             # increase counter
            }
        f++                     # increase counter for every line
    }' file 
于 2013-11-20T09:41:33.037 回答