-1

不知道在这里问是否可以,因为它不是编程,但我不知道还能去哪里:我想以一致的方式组织以下数据。目前它一团糟,只有前两列(逗号分隔)一致。其余列的编号可以从 1 到 9 任意编号,并且通常不同。换句话说,我想对其进行排序以使文本匹配(一行中的所有值列,一行中的所有反冲列等)。然后我可以删除文本并添加标题,它仍然有意义。

bm_wp_upg_o_t1micro, sight, value = 3, zoom = 3, recoil = 1, spread_moving = -1 
bm_wp_upg_o_marksmansight_rear, sight, value = 3, zoom = 1, recoil = 1, spread = 1 
bm_wp_upg_o_marksmansight_front, extra, value = 1 
bm_wp_m4_upper_reciever_edge, upper_reciever, value = 3, recoil = 1 
bm_wp_m4_upper_reciever_round, upper_reciever, value = 1 
bm_wp_m4_uupg_b_long, barrel, value = 4, damage = 1, spread = 1, spread_moving = -2, concealment = -2 

任何建议(即使是在正确的地方实际提出这个问题)都会很棒。上下文只是从我试图组织的游戏文件中提取的原始数据。

4

3 回答 3

1

恐怕正则表达式在这里不会对您有太大帮助,因为您输入的不规则性质(可以匹配它,但是以一种或另一种方式安排所有内容将是一种负担)。这可以用任何编程语言很容易地完成,但是对于这样的东西,我总是去awk

假设您的输入位于名为 的文件中input.txt,请将以下内容放入名为 的程序中parse.awk

BEGIN {
    FS=" *, *";
    formatStr = "%32s,%8s,%8s,%8s,%10s,%16s,%8s,%18s,%10s,%10s,%16s,%16s\n";
    printf( formatStr, "id", "sight", "value", "zoom", "recoil", "spread_moving", "extra", "upper_receiver", "barrel", "damage", "spread_moving", "concealment" );
}

{
    split("",a);
    for( i=2; i<=NF; i++ ) {
        if( split( $(i), kvp, " *= *" ) == 1 ) {
            a[kvp[1]] = "x";
        } else {
            a[kvp[1]] = gensub( /^\s*|\s*$/, "", "g", kvp[2] );
        }
    }
    printf( formatStr, $1, a["sight"], a["value"], a["zoom"], a["recoil"],
        a["spread_moving"], a["extra"], a["upper_receiver"], 
        a["barrel"], a["damage"], a["spread_moving"], a["concealment"] );
}

对它运行 awk:

awk -f parse.awk input.txt

并得到你的输出:

                              id,   sight,   value,    zoom,    recoil,   spread_moving,   extra,    upper_receiver,    barrel,    damage,   spread_moving,     concealment
             bm_wp_upg_o_t1micro,       x,       3,       3,         1,              -1,        ,                  ,          ,          ,              -1,
  bm_wp_upg_o_marksmansight_rear,       x,       3,       1,         1,                ,        ,                  ,          ,          ,                ,
 bm_wp_upg_o_marksmansight_front,        ,       1,        ,          ,                ,       x,                  ,          ,          ,                ,
    bm_wp_m4_upper_reciever_edge,        ,       3,        ,         1,                ,        ,                  ,          ,          ,                ,
   bm_wp_m4_upper_reciever_round,        ,       1,        ,          ,                ,        ,                  ,          ,          ,                ,
            bm_wp_m4_uupg_b_long,        ,       4,        ,          ,              -2,        ,                  ,         x,         1,              -2,              -2

请注意,我选择只使用“x”来表示视线,这似乎是一个存在/不存在的东西。你可以在那里使用任何你想要的东西。

如果您使用的是 Linux 或 Macintosh,则应该有可用的 awk。如果您使用的是 Windows,则必须安装它。

于 2013-08-22T21:46:08.720 回答
1

我确实制作了另一个 awk 版本。我认为这应该更容易阅读。从文件中读取所有值/列以使其尽可能动态。

awk -F, '
    {
    ID[$1]=$2                   # use column 1 as index
    for (i=3;i<=NF;i++ )        # loop through all fields from #3 to end
        {
        gsub(/ +/,"",$i)            # remove space from field
        split($i,a,"=")         # split field in name and value a[1] and a[2]
        COLUMN[a[1]]++          # store field name as column name
        DATA[$1" "a[1]]=a[2]    # store data value in DATA using field #1 and column name as index
        }
    } 
END {
    printf "%49s   ","info"     # print info
    for (i in COLUMN)
        {printf "%15s",i}       # print column name
    print ""
    for (i in ID)               # loop through all ID
        {
        printf "%32s %16s ",i, ID[i]    # print ID and info
        for (j in COLUMN)
            {
            printf "%14s ",DATA[i" "j]+0    # print value
            }
        print ""
        }
    }' file

输出

                                             info            spread         recoil           zoom    concealment  spread_moving         damage          value
   bm_wp_m4_upper_reciever_round   upper_reciever              0              0              0              0              0              0              1
            bm_wp_m4_uupg_b_long           barrel              1              0              0             -2             -2              1              4
  bm_wp_upg_o_marksmansight_rear            sight              1              1              1              0              0              0              3
 bm_wp_upg_o_marksmansight_front            extra              0              0              0              0              0              0              1
    bm_wp_m4_upper_reciever_edge   upper_reciever              0              1              0              0              0              0              3
             bm_wp_upg_o_t1micro            sight              0              1              3              0             -1              0              3
于 2013-08-23T07:55:31.497 回答
0

坚持 Ethan 的回答——这只是我在享受自己。(是的,这让我很奇怪!)

awk 脚本

awk 'BEGIN  {
                # f_idx[field] holds the column number c for a field=value item
                # f_name[c]    holds the names
                # f_width[c]   holds the width of the widest value (or the field name)
                # f_fmt[c]     holds the appropriate format
                FS = " *, *"; n = 2;
                f_name[0] = "id";   f_width[0] = length(f_name[0])
                f_name[1] = "type"; f_width[1] = length(f_name[1])
            }
            {
                #-#print NR ":" $0
                line[NR,0] = $1
                len = length($1)
                if (len > f_width[0])
                    f_width[0] = len
                line[NR,1] = $2
                len = length($2)
                if (len > f_width[1])
                    f_width[1] = len
                for (i = 3; i <= NF; i++)
                {
                    split($i, fv, " = ")
                    #-#print "1:" fv[1] ", 2:" fv[2]
                    if (!(fv[1] in f_idx))
                    {
                        f_idx[fv[1]] = n
                        f_width[n++] = length(fv[1])
                    }
                    c = f_idx[fv[1]]
                    f_name[c] = fv[1]
                    gsub(/ /, "", fv[2])
                    len = length(fv[2])
                    if (len > f_width[c])
                        f_width[c] = len
                    line[NR,c] = fv[2]
                    #-#print c ":" f_name[c] ":" f_width[c] ":" line[NR,c]
                }
            }
     END    {
                for (i = 0; i < n; i++)
                    f_fmt[i] = "%s%" f_width[i] "s"
                #-#for (i = 0; i < n; i++)
                #-#    printf "%d: (%d) %s %s\n", i, f_width[i], f_name[i], f_fmt[i]
                #-#    pad = ""
                for (j = 0; j < n; j++)
                {
                    printf f_fmt[j], pad, f_name[j]
                    pad = ","
                }
                printf "\n"
                for (i = 1; i <= NR; i++)
                {
                    pad = ""
                    for (j = 0; j < n; j++)
                    {
                        printf f_fmt[j], pad, line[i,j]
                        pad = ","
                    }
                    printf "\n"
                }
            }' data

该脚本适应它在文件中找到的数据。它将列标题“id”分配给输入的第 1 列,将“类型”分配给第 2 列。对于第 3..N 列中的每组值,它将数据拆分为键 (in fv[1]) 和值 (中fv[2])。如果之前没有见过key,则为其分配一个新的列号,并将key作为列名存储,key的宽度作为初始列宽。然后将该值存储在该行内的相应列中。

读取所有数据后,脚本知道列标题将是什么。然后它可以创建一组格式字符串。然后它打印标题和所有数据行。如果您不想要固定宽度的输出,那么您可以大大简化脚本。可以对此脚本进行一些(大部分是次要的)简化。

数据文件

bm_wp_upg_o_t1micro, sight, value = 3, zoom = 3, recoil = 1, spread_moving = -1 
bm_wp_upg_o_marksmansight_rear, sight, value = 3, zoom = 1, recoil = 1, spread = 1 
bm_wp_upg_o_marksmansight_front, extra, value = 1 
bm_wp_m4_upper_receiver_edge, upper_receiver, value = 3, recoil = 1 
bm_wp_m4_upper_receiver_round, upper_receiver, value = 1 
bm_wp_m4_uupg_b_long, barrel, value = 4, damage = 1, spread = 1, spread_moving = -2, concealment = -2

输出

                             id,          type,value,zoom,recoil,spread_moving,spread,damage,concealment
            bm_wp_upg_o_t1micro,         sight,    3,   3,     1,           -1,      ,      ,           
 bm_wp_upg_o_marksmansight_rear,         sight,    3,   1,     1,             ,     1,      ,           
bm_wp_upg_o_marksmansight_front,         extra,    1,    ,      ,             ,      ,      ,           
   bm_wp_m4_upper_receiver_edge,upper_receiver,    3,    ,     1,             ,      ,      ,           
  bm_wp_m4_upper_receiver_round,upper_receiver,    1,    ,      ,             ,      ,      ,           
           bm_wp_m4_uupg_b_long,        barrel,    4,    ,      ,           -2,     1,     1,         -2
于 2013-08-23T05:59:27.260 回答