3

我正在将一个 .txt 文件读入 SAS,它使用“|” 作为分隔符。问题是有一列使用“|” 作为一个单词分隔符,而不是像分隔符一样,这​​需要在一列中。

例如,txt 文件如下所示:

apple|fruit|Healthy|choices|of|food|12|2012|chart

需要在 SAS 数据集中看起来像这样:

apple | fruit | Healthy choices of Food | 12 | 2012 | chart

如何消除“|” 在“健康的食物选择”之间?

4

4 回答 4

2

我认为这会做你想要的:

data tmp1;
  length tmp $100;
  input tmp $;
  cards;
apple|fruit|Healthy|choices|of|food|12|2012|chart
apple|fruit|Healthy|choices|of|food|and|lots|of|other|stuff|12|2012|chart
;
run;

data tmp2;
  set tmp1;
  num_delims=length(tmp)-length(compress(tmp,"|"));
  expected_delims=5;
  extra_delims=num_delims-expected_delims;
  length new_var $100;
  i=1;
  do while(scan(tmp,i,"|") ne "");
    if i<=2 or (extra_delims+2)<i<=num_delims then new_var=trim(new_var)||scan(tmp,i,"|")||"|";
    else new_var=trim(new_var)||scan(tmp,i,"|")||"#";
    i+1;
  end;
  new_var=left(tranwrd(new_var,"#"," "));
run;
于 2012-11-15T12:22:54.757 回答
0

稍微扩展一下 Itzy 的答案,这是另一种可能的解决方案:

data want;
   /* Define variables */
   attrib item    length=$10 label='Item';
   attrib class   length=$10 label='Family';
   attrib desc    length=$80 label='Item Description';
   attrib count   length=8   label='Some number';
   attrib year    length=$4  label='Year';
   attrib somevar length=$10 label='Some variable';

   length countc $8; /* A temp variable */

   infile 'c:\temp\delimited_temp.txt' lrecl=1000 truncover;
   input;
   item    = scan(_infile_,1,'|','mo');
   class   = scan(_infile_,2,'|','mo');
   countc  = scan(_infile_,-3,'|','mo');  /* Temp var for numeric field */
   count   = inputn(countc,'8.');         /* Re-read the numeric field */
   year    = scan(_infile_,-2,'|','mo');
   somevar = scan(_infile_,-1,'|','mo');

   desc = tranwrd(
             substr(_infile_
                 ,length(item)+length(class)+3
                 ,length(_infile_) 
                    - ( length(item)+length(class)+length(countc)
                       +length(year)+length(somevar)+5))
            ,'|',' ');
   drop countc;
run;

在这种情况下,关键是直接读取您的文件并自己处理分隔符。这可能很棘手,并且要求您的数据文件与描述的完全相同。更好的解决方案是返回给提供此数据的人,并要求他们以更合适的形式将其提供给您。祝你好运!

于 2012-11-15T16:11:16.100 回答
0

这不是特别优雅,但它会起作用:

data tmp;
 input tmp $50.;
 cards;
apple|fruit|Healthy|choices|of|food|12|2012|chart
;
run;

data tmp;
 set tmp;
 var1 = scan(tmp,1,'|');
 var2 = scan(tmp,2,'|');
 var4 = scan(tmp,-3,'|');
 var5 = scan(tmp,-2,'|');
 var6 = scan(tmp,-1,'|');

 var3 = tranwrd(tmp,trim(var1)||"|"||trim(var2),"");
 var3 = tranwrd(var3,trim(var4)||"|"||trim(var5)||"|"||trim(var6),"");
 var3 = tranwrd(var3,"|"," ");
 run;
于 2012-11-15T02:05:21.563 回答
0

另一种可能的解决方法。

data tmp;
infile '/path/to/textfile'; 
input tmp :$100.;
array varlst (*) $30 v1-v6;
a=countw(tmp,'|');
do i=1 to dim(varlst);
 if i<=2 then
   varlst(i) = scan(tmp,i,'|');
 else if i>=4 then
   varlst(i) = scan(tmp,a-(dim(varlst)-i),'|');
 else do j=3 to a-(dim(varlst)-i)-1;
   varlst(i)=catx(' ', varlst(i),scan(tmp,j,'|'));
   end;
 end;
drop tmp a i j; 
run;
于 2012-11-15T17:14:41.630 回答