4

我有一个包含 10 列的文本文件,例如 f.txt,如下所示:

aab abb  263-455
aab abb  263-455
aab abb  263-455
bbb abb  26-455
bbb abb  26-455
bbb aka  264-266
bga bga  230-232
bga bga  230-232

我想根据第三列的数字计算第一列和第二列中每个字符串的唯一编号。

输出:

aab - 1
abb - 2
bbb - 2
aka - 1
bga - 2

Total no - 8
4

5 回答 5

3

这可以解决问题:

$ awk '!a[$0]++{c[$1]++;c[$2]++}
       END{for(k in c){print k" - "c[k];s+=c[k]}print "\nTotal No -",s}' file
aka - 1
bga - 2
aab - 1
abb - 2
bbb - 2

Total No - 8

以更具可读性的脚本形式:

!lines[$0]++{
    count[$1]++
    count[$2]++
}
END {
    for (line in count) {
        print line" - "count[line]
        sum += count[line]
    }
    print "\nTotal No -",sum
}

要以这种形式运行它,请将其保存到文件中script.awk,然后:

$ awk -f script.awk file
aka - 1
bga - 2
aab - 1
abb - 2
bbb - 2

Total No - 8
于 2013-05-24T07:54:55.927 回答
3
awk '
       !s[1":"$1":"$3]++{sU[$1]++;tot++} 
       !s[2":"$2":"$3]++{sU[$2]++;tot++} 
       END{
         for (x in sU) print x, sU[x]; 
         print "Total No -",tot;
       }' input

输出

bga 1
aab 1
bbb 2
aka 1
bga 1
abb 2
Total No - 8
于 2013-05-24T08:03:40.377 回答
2
 awk '!b[$1,$3]++{a[$1]++} !c[$2,$3]++{a[$2]++} END{for (i in a) {print i,a[i];sum+=a[i]}print "Total -",sum}' file
于 2013-05-24T08:45:02.430 回答
1

这是一个有点长的命令,但很容易理解:

gawk '{a[$3,$1,1];a[$3,$2,2]}END{for(i in a)print i}' input |
    cut -d $'\x1c' -f 2 | sort | uniq -c |
        awk -v OFS=' - ' '{sum+=$1;print $2,$1};END{print "\nTotal No",sum}'

aab - 1
abb - 2
aka - 1
bbb - 2
bga - 2

Total No - 8
于 2013-05-24T08:08:27.163 回答
0
{ if (a[$1][$3] != 1){
    a[$1][$3] = 1; 
    total[$1]++; 
    }
if (a[$2][$3] != 1){
    a[$2][$3] = 1; 
    total[$2]++; 
    }
}
END {
    for (item in total){
        print item, total[item];
        totalCount += total[item];
    }
    print "\nTotal no - ", totalCount;
}

输出:

aka 1
bga 1
aab 1
abb 2
bbb 2

Total no -  7
于 2013-05-24T08:09:39.673 回答