0

我有一个coaches_assistants使用以下结构调用的 SAS 数据集。每个 总是只有两条记录TeamID

TeamID     Team_City    CoachCode
123        Durham       Head_242
123        Durham       Assistant_876
124        London       Head_876
124        London       Assistant_922
125        Bath         Head_667
125        Bath         Assistant_786
126        Dover        Head_544
126        Dover        Assistant_978
...        ...          ....

我想做的是创建一个带有额外字段的数据集,AssistantCode并使其看起来像:

TeamID     Team_City    HeadCode   AssistantCode
123        Durham       242        876
124        London       876        922
125        Bath         667        786
126        Dover        544        978
...        ...          ...        ...

如果可能,我想在单个 DATA 步骤中执行此操作(尽管我认识到我可能首先需要 PROC SORT 步骤)。我知道如何在 python 或 ruby​​ 或任何传统的脚本语言中做到这一点,但我不知道如何在 SAS 中做到这一点。

最好的方法是什么?

4

3 回答 3

2

虽然可以在一个数据步中完成,但我通常发现这种问题在 PROC TRANSPOSE 中得到更好的解决。这种方式减少了手动编码,并且对新事物具有更大的灵活性(比如说出现了一个新值“HeadAssistant”,这将立即起作用)。

data have;
length coachcode $25;
input TeamID     Team_City  $  CoachCode $;
datalines;
123        Durham       Head_242
123        Durham       Assistant_876
124        London       Head_876
124        London       Assistant_922
125        Bath         Head_667
125        Bath         Assistant_786
126        Dover        Head_544
126        Dover        Assistant_978
;;;;
run;

data have_t;
set have;
id=scan(coachcode,1,'_');
val = scan(coachcode,2,'_');
keep teamId team_city id val;
run;

proc transpose data=have_t out=want(drop=_name_);
by teamID team_city;
id id;
var val;
run;
于 2013-07-06T18:12:35.590 回答
1

以下是两种可能的解决方案(一种使用请求的数据步骤,另一种使用 PROC SQL):

data have;
   length TeamID $3 Team_City CoachCode $20; 
   input TeamID $ Team_City $ CoachCode $;
   datalines;
123        Durham       Head_242
123        Durham       Assistant_876
124        London       Head_876
124        London       Assistant_922
125        Bath         Head_667
125        Bath         Assistant_786
126        Dover        Head_544
126        Dover        Assistant_978
run;

/* A data step solution */
proc sort data=have;
   by TeamID;
run;

data want1(keep=TeamID Team_City HeadCode AssistantCode);
   /* Define all variables, retain the new ones */
   length TeamID $3 Team_City $20 HeadCode $3 AssistantCode $3; 
   retain HeadCode AssistantCode; 
   set have;
      by TeamID;
   if CoachCode =: 'Head'
      then HeadCode = substr(CoachCode,6,3);
      else AssistantCode = substr(CoachCode,11,3);
   if last.TeamID;
run;

/* An SQL solution */
proc sql noprint;
   create table want2 as
   select TeamID
        , max(Team_City) as Team_City
        , max(CASE WHEN CoachCode LIKE 'Head%'
                   THEN substr(CoachCode,6,3) ELSE ' '
              END) LENGTH=3 as HeadCode
        , max(CASE WHEN CoachCode LIKE 'Assistant%'
                   THEN substr(CoachCode,11,3) ELSE ' '
              END) LENGTH=3 as AssistantCode
   from have
   group by TeamID;
quit;

PROC SQL 的优点是不需要您提前对数据进行排序。

于 2013-07-06T18:27:12.043 回答
0

这假设您已经对数据进行了排序teamID,并且主教练总是排在助理之前。警告:未经测试(我真的需要再次访问 SAS....)

data want (drop=nc coachcode);
    set have;
    length headcode assistantcode $3;
    retain headcode;
    by teamid;
    nc = length(coachcode);
    if substr(coachcode, 1, 4) = 'Head' then
        headcode = substr(coachcode, nc-2, nc);
    else
        assistantcode = substr(coachcode, nc-2, nc);
    if last.teamid;
run;
于 2013-07-06T17:41:40.000 回答