对于这样的输入(第一列左侧没有初始选项卡):
184
2
P 2853263 4998463
SS
AG0001-C
T/T C/C A/A
AG0002-C
T/T C/C A/T
AG0003-C
T/T C/C A/A
AG0004-C
T/T C/C T/A
此脚本应与 Mawk 一起使用:
#!/usr/bin/awk -f
NR <= 4 || NR % 2 { print; next; }
{
rows = 0
for (i = 1; i <= NF; ++i) {
count = split($i, b, /\//)
if (count > rows) {
rows = count
}
for (j = 1; j <= count; ++j) {
key = i "|" j
a[key] = b[j]
}
}
for (i = 1; i <= rows; ++i) {
key = 1 "|" i
printf("%s", a[key])
for (j = 2; j <= NF; ++j) {
key = j "|" i
printf("\t%s", a[key])
}
print ""
}
for (i in a) {
delete a[i]
}
}
输出:
184
2
P 2853263 4998463
SS
AG0001-C
T C A
T C A
AG0002-C
T C A
T C T
AG0003-C
T C A
T C A
AG0004-C
T C T
T C A
它甚至应该适用于像这样的不同格式:
184
2
P 2853263 4998463
SS
AG0001-C
A/A/C/X/Y/Z T/T C/C A/A A/A/C/X A/A/B A/A/C/X/Y
AG0002-C
T/T C/C A/T
AG0003-C
T/T C/C A/A
AG0004-C
T/T C/C T/A
输出:
184
2
P 2853263 4998463
SS
AG0001-C
A T C A A A A
A T C A A A A
C C B C
X X X
Y Y
Z
AG0002-C
T C A
T C T
AG0003-C
T C A
T C A
AG0004-C
T C T
T C A
对于左侧带有选项卡的输入:
184
2
P 2853263 4998463
SS
AG0001-C
T/T C/C A/A
AG0002-C
T/T C/C A/T
AG0003-C
T/T C/C A/A
AG0004-C
T/T C/C T/A
这段代码
#!/usr/bin/awk -f
NR <= 4 || NR % 2 { print; next; }
{
rows = 0
for (i = 1; i <= NF; ++i) {
count = split($i, b, /\//)
if (count > rows) {
rows = count
}
for (j = 1; j <= count; ++j) {
key = i "|" j
a[key] = b[j]
}
}
for (i = 1; i <= rows; ++i) {
for (j = 1; j <= NF; ++j) {
key = j "|" i
printf("\t%s", a[key])
}
print ""
}
for (i in a) {
delete a[i]
}
}
会给出一个输出
184
2
P 2853263 4998463
SS
AG0001-C
T C A
T C A
AG0002-C
T C A
T C T
AG0003-C
T C A
T C A
AG0004-C
T C T
T C A