单程。您将需要GNU awk
,因为我使用该delete
指令来清空数组。如果出现错误,将需要使用for
循环来单独删除每个键。
假设infile
有数据:
land; road; car (blue); 1956
land; road; car (blue); 1956 ; car (yellow); 1995
land; road; car (blue); 1956 ; car (yellow); 1995; car (red); 1979
air; -; plane (black); 1984
air; -; plane (black); 1984; helicopter (red); 1998
和中的awk
程序script.awk
:
BEGIN {
## Split line in ";" with spaces between it.
FS = "[[:space:]]*;[[:space:]]*";
## In output separate fields with a ";" followed by a space.
OFS = "; ";
}
## First line is special, save the line with its first two fields as a key of
## a hash to check repeated ones.
FNR == 1 {
keys[ $1 OFS $2 ] = $0;
next;
}
## For every line...
{
## Extract the key (first two fields).
key = $1 OFS $2;
## I want to get last line of each key. If it exists in the hash may be the last one, but
## can't be sure until I read the next one, so save its content, read next line and wait...
if ( key in keys ) {
keys[ key ] = $0;
next;
}
## Order and print vehicles by date :-)
order_and_print_vehicles_by_date(keys);
## Empty the hash.
delete keys;
## Save new kind of vehicles.
keys[ key ] = $0;
}
END {
order_and_print_vehicles_by_date(keys);
}
function order_and_print_vehicles_by_date(keys, ordered_line, dates, vehicles) {
## "keys" has only one key, get it.
for ( k in keys ) {
line = keys[ k ];
}
## Remove the key (first two fields) of the line.
sub( /^([^;]*;){2}[[:space:]]*/, "", line );
## Get vehicles and dates from the line.
split( line, data, /;[[:space:]]*/ );
## Even positions of the array are vehicles, odd positions are for dates. Extract them.
for ( i = length( data ); i >= 1; i-- ) {
if ( i % 2 == 0 ) {
dates[ ++d ] = data[ i ];
}
else {
vehicles[ dates[d] ] = data[ i ];
}
}
## Sort dates in descendant order.
asort( dates, ordered_dates, "@val_num_desc" );
## Get the line to print.
printf "%s%s", k, OFS;
for ( i = 1; i <= length( ordered_dates ); i++ ) {
ordered_line = ordered_line sprintf( "%s%s%s%s", vehicles[ ordered_dates[i] ], OFS, ordered_dates[i], OFS );
}
## Remove last ";" from the line and print.
sub( /[[:space:]]*;[[:space:]]*$/, "", ordered_line );
printf "%s\n", ordered_line;
}
像这样运行它:
awk -f script.awk infile
这会产生:
land; road; car (yellow); 1995; car (red); 1979; car (blue); 1956
air; -; helicopter (red); 1998; plane (black); 1984