0

我有一个看起来像这样的 csv 文件:

col1|col2

1|a
2|g
3|f

1|m
3|k
2|n

2|a
1|d
4|r
3|s

where|分隔列,并希望将其转换为同质的东西,例如:

------------------------
fields  >    1  2  3  4
record1      a  g  f
record2      m  n  k
record3      d  a  s  r
------------------------

有没有办法做到这一点?使用 mysql 或编辑 csv 文件会更好吗?

4

3 回答 3

2

我写了这个,适用于你的例子:gawk是必需的

awk -F'|' -v RS="" '{for(i=1;i<=NF;i+=2)a[$i]=$(i+1);asorti(a,d);
for(i=1;i<=length(a);i++)printf "%s", a[d[i]]((i==length(a))?"":" ");delete a;delete d;print ""}' file

例子:

kent$  cat file

1|a
2|g
3|f

1|m
3|k
2|n

2|a
1|d
4|r
3|s
kent$  awk -F'|' -v RS="" '{for(i=1;i<=NF;i+=2)a[$i]=$(i+1);asorti(a,d);
for(i=1;i<=length(a);i++)printf "%s", a[d[i]]((i==length(a))?"":" ");delete a;delete d;print ""}' file
a g f
m n k
d a s r
于 2013-04-03T13:30:57.860 回答
1

这是一个 awk 解决方案:

BEGIN{
    RS=""
    FS="\n"
}
FNR==NR&&FNR>1{
    for (i=1;i<=NF;i++) {
        split($i,d,"|")
        if (d[1] > max)
            max = d[1]
    }
    next
}   
FNR>1&&!header{
    printf "%s\t","fields  >"
    for (i=1;i<=max;i++)
        printf "%s\t",i
    print ""
    header=1
}
FNR>1{    
    printf "record%s\t\t",FNR-1

    for (i=1;i<=NF;i++) {
        split($i,d,"|")
        val[d[1]] = d[2]    
    }

    for (i=1;i<=max;i++)
        printf "%s\t",val[i]?val[i]:"NULL"
    print ""

    delete val
}

另存为script.awk并运行(注意它使用两遍方法,因此您需要两次提供文件)

$ awk -f script.awk file file
fields  >       1       2       3       4
record1         a       g       f       NULL
record2         m       n       k       NULL
record3         d       a       s       r

将该行添加5|b到第一个记录中file给出输出:

$ awk -f script.awk file file
fields  >       1       2       3       4       5
record1         a       g       f       NULL    b
record2         m       n       k       NULL    NULL
record3         d       a       s       r       NULL
于 2013-04-03T14:29:17.353 回答
1
$ cat file
col1|col2

1|a
2|g
3|f
5|b

1|m
3|k
2|n

2|a
1|d
4|r
3|s
$
$ awk -f tst.awk file
fields >    1    2    3    4    5
record1     a    g    f NULL    b
record2     m    n    k NULL NULL
record3     d    a    s    r NULL
$
$ cat tst.awk
BEGIN{ RS=""; FS="\n" }

NR>1 {
   ++numRecs

   for (i=1;i<=NF;i++) {
      split($i,fldNr2val,"|")
      fldNr = fldNr2val[1]
      val   = fldNr2val[2]

      recNrFldNr2val[numRecs,fldNr] = val

      numFlds = (fldNr > numFlds ? fldNr : numFlds)
   }
}

END {
   printf "fields >"
   for (fldNr=1;fldNr<=numFlds;fldNr++) {
      printf " %4s", fldNr
   }
   print ""

   for (recNr=1; recNr<=numRecs; recNr++) {
      printf "record%d ", recNr
      for (fldNr=1;fldNr<=numFlds;fldNr++) {
         printf " %4s", ((recNr,fldNr) in recNrFldNr2val ? recNrFldNr2val[recNr,fldNr] : "NULL")
      }
      print ""
   }
}
于 2013-04-03T15:18:19.297 回答