unix - 使用 AWK 转置

Question

可能重复：
使用 AWK 或 Perl 转置

嗨，我想使用 AWK 获取以下格式的输出文件。我的输入文件是一个空格分隔的文本文件。对此的任何帮助将不胜感激。谢谢

输入文件

id  quantity colour shape   size colour shape     size  colour  shape   size
1   10       blue   square  10   red    triangle   12   pink    circle  20
2   12       yellow pentagon 3   orange rectangle   4   purple   oval   6

期望的输出

id  colour  shape    size
1   blue    square   10
1   red     triangle 12
1   pink    circle   20
2   yellow  pentagon  3
2   orange  rectangle 4
2   purple  oval      6

score 2 · Accepted Answer

这是通用的，因此您可以按名称选择输出列。我已经用输入数据中的其他列对其进行了测试，无论它们的位置如何，它们都不会得到输出。

#!/usr/bin/awk -f
BEGIN {
    col_list = "colour shape size"
    # Use a B ("blank") to add spaces in the output before or
    # after a format string (e.g. %6dB), but generally use the numeric argument
    col_fmt = "%-12s %-14s %5d"
    # columns to be repeated on multiple lines may appear anywhere in
    # the input, but they will be output together at the beginning of the line
    repeat_fields["id"]
    # since these are individually set we won't use B
    repeat_fmt["id"] = "%4d "
    # additional fields to repeat on each line
    #repeat_fields["another"]
    #repeat_fmt["another"] = "%8s"
    ncols = split(col_list, cols)
    split(col_fmt, fmts)
    for (i = 1; i <= ncols; i++) {
        col_names[cols[i]]
        forms[cols[i]] = fmts[i]
    }
}

# save the positions of the columns using the header line
FNR == 1 {
    for (i = 1; i <= NF; i++) {
        if ($i in repeat_fields) {
            repeat[++nrepeats] = i
            repeat_look[i] = i
            rformats[i] = repeat_fmt[$i]
        }
        if ($i in col_names) {
            col_nums[++n] = i
            col_look[i] = i
            formats[i] = forms[$i]
        }
    }
    # print the header line
    for (i = 1; i <= nrepeats; i++) {
        f = rformats[repeat[i]]
        sub("d", "s", f)
        gsub("B", " ", f)
        printf f, $repeat[i]
    }
    for (i = 1; i <= ncols; i++) {
        f = formats[col_nums[i]]
        sub("d", "s", f)
        gsub("B", " ", f)
        printf f, $col_nums[i]
    }
    printf "\n"
    next
}

{
    for (i = 1; i <= NF; i++) {
        if (i in repeat_look) {
            f = rformats[i]
            gsub("B", " ", f)
            repeat_out = repeat_out sprintf(f, $i)

        }
        if (i in col_look) {
            f = formats[i]
            gsub("B", " ", f)
            out = out sprintf(f, $i)
            coln++
        }
        if (coln == ncols) {
            print repeat_out out
            out = ""
            coln = 0
        }
    }
    repeat_out = ""
}

使用此修改后的输入数据：

no id  colour base   shape    size colour shape     size  colour  shape   size material
14 1   blue   twenty square   10   red    triangle   12   pink    circle  20   wool
23 2   yellow ninety pentagon 3    orange rectangle   4   purple  oval    6    cotton

输出是：

  id colour      shape          size
   1 blue        square           10
   1 red         triangle         12
   1 pink        circle           20
   2 yellow      pentagon          3
   2 orange      rectangle         4
   2 purple      oval              6

score 1 · Accepted Answer

见下文：

kent$  cat a
id  colour  shape   size colour shape     size  colour  shape   size
1   blue    square  10   red    triangle   12   pink    circle  20
2   yellow  pentagon 3   orange rectangle   4   purple   oval   6

kent$  awk 'NR==1{print "id colour shape size";next;}
{id=$1; printf id;
        for(i=2;i<=NF;i++){
                printf FS$i; if((i-1)%3==0)printf (NF!=i)?"\n"id:"\n"; }}' a         
id colour shape size
1 blue square 10
1 red triangle 12
1 pink circle 20
2 yellow pentagon 3
2 orange rectangle 4
2 purple oval 6

如果您有“列”，则可以将输出通过管道传输到列以使其看起来更好：

kent$  awk 'NR==1{print "id colour shape size";next;}
{id=$1; printf id;
        for(i=2;i<=NF;i++){
                printf FS$i; if((i-1)%3==0)printf (NF!=i)?"\n"id:"\n"; }}' a|column -t
id  colour  shape      size
1   blue    square     10
1   red     triangle   12
1   pink    circle     20
2   yellow  pentagon   3
2   orange  rectangle  4
2   purple  oval       6

score 0 · Accepted Answer

单程：

内容script.awk：

FNR == 1 { 
    for ( i = 1; i <= 4; i++ ) { 
        header = (header ? header "\t" : "") $i
    }   
    printf "%s\n", header
    next
}

FNR > 1 { 
    id = $1
    for ( i = 2; i <= NF; i += 3 ) { 
        j = i + 2 
        for ( ; j >= i; j-- ) { 
            line = $j "\t" line
        }   
        printf "%d\t%s\n", id, line
        line = ""
    }   
}

运行它（感谢Kent的columnt -t命令）：

awk -f script.awk infile | column -t

具有以下输出：

id  colour  shape      size
1   blue    square     10
1   red     triangle   12
1   pink    circle     20
2   yellow  pentagon   3
2   orange  rectangle  4
2   purple  oval       6

score 0 · Accepted Answer

sed -e '1! {s/\([0-9][0-9]*\)[ \t][ \t]*/\1\n/g;}' test.txt |awk -vOFS="\t" 'NR==1 {print $1,$2,$3,$4} NF==1 {id=$1} NR>1 && NF>1 {print id,$0}'

试图在 sed 中完成这一切，但我对保持缓冲区很满意，什么都没有。无论如何，在每个数字后插入一个换行符：

id  colour  shape   size colour shape     size  colour  shape   size
1
blue    square  10
red    triangle   12
pink    circle  20
2
yellow  pentagon 3
orange rectangle   4
purple   oval   6

并使用 awk 将 id 向下移动到以下行：

id  colour  shape   size
1   blue    square  10
1   red    triangle   12
1   pink    circle  20
2   yellow  pentagon 3
2   orange rectangle   4
2   purple   oval   6

虽然应该在 sed 中完全可行

unix - 使用 AWK 转置

4 回答 4

Related

Reference