感谢 Inquistive Mind 帮助我生成唯一的代理键。这是我测试过并且运行良好的猪脚本。
A = LOAD '/user/root5/data3.txt' USING PigStorage(',');
B = DISTINCT A;
C = RANK B;
D = FOREACH C GENERATE CONCAT('SCN',$0),$1,$2,$3;
E = JOIN A BY ($0,$1,$2),D BY ($1,$2,$3);
F = FOREACH E GENERATE $3, $0, $1, $2;
DUMP F;
每个步骤的输出如下:
DUMP A;
(20000,newyork,john)
(30000,sydney,joseph)
(60000,delhi,mike)
(20000,newyork,john)
(30000,sydney,mike)
(60000,delhi,mike)
DUMP B;
(20000,newyork,john)
(30000,sydney,mike)
(30000,sydney,joseph)
(60000,delhi,mike)
DUMP C;
(1,20000,newyork,john)
(2,30000,sydney,mike)
(3,30000,sydney,joseph)
(4,60000,delhi,mike)
DUMP D;
(SCN1,20000,newyork,john)
(SCN2,30000,sydney,mike)
(SCN3,30000,sydney,joseph)
(SCN4,60000,delhi,mike)
DUMP E;
(20000,newyork,john,SCN1,20000,newyork,john)
(20000,newyork,john,SCN1,20000,newyork,john)
(30000,sydney,mike,SCN2,30000,sydney,mike)
(30000,sydney,joseph,SCN3,30000,sydney,joseph)
(60000,delhi,mike,SCN4,60000,delhi,mike)
(60000,delhi,mike,SCN4,60000,delhi,mike)
DUMP F;
(SCN1,20000,newyork,john)
(SCN1,20000,newyork,john)
(SCN2,30000,sydney,mike)
(SCN3,30000,sydney,joseph)
(SCN4,60000,delhi,mike)
(SCN4,60000,delhi,mike)'