0

我正在使用 SAS EG 来执行此操作。这是我拥有的输入表:

ref11  ref2  Col1  Col2  Col3  Col4 Col5 Col6
 A     B     41          42              
 D     E           63    65         68
 X     Y     23    25    55         89   99
 K     L     12                22
 U     V                 22    88        11

我正在尝试将输出作为

R1  R2  C1  C2  C3  C4 C5 C6
 A   B  41  42              
 D   E  63  65  68
 X   Y  23  25  55  57 89 99
 K   L  12  22
 U   V  22  88  11

所以基本上我的参考是 ref1 和 ref2,我想去掉每个参考的空白。例如,对于 AB,col2 为空白,因此 col3 的值应移动到 col2。

这是我现在拥有的代码。

PROC SQL;

CREATE TABLE output AS

SELECT ref1 as R1,
       ref2 as R2,
       (CASE WHEN col1 <> '' THEN col1 ELSE (CASE WHEN col2 <> '' THEN col2 ELSE (CASE WHEN col3 <> '' THEN col3 ELSE
       (CASE WHEN col4 <> '' THEN col4 ELSE (CASE WHEN col5 <> '' THEN col5 ELSE (CASE WHEN col6 <> '' THEN col6 ELSE '' END) 
       END) END) END) END) END) AS C1,

       (CASE WHEN col2 <> '' AND col1 <> '' THEN col2 ELSE (CASE WHEN col3 <> '' THEN col3 ELSE
       (CASE WHEN col4 <> '' THEN col4 ELSE (CASE WHEN col5 <> '' THEN col5 ELSE (CASE WHEN col6 <> '' THEN col6 ELSE '' END) 
       END) END) END) END) AS C2,

      (CASE WHEN col3 <> ''  AND col2 <> ''  AND col1 <> '' THEN col3 ELSE (CASE WHEN col4 <> '' THEN col4 ELSE 
      (CASE WHEN col5 <> '' THEN col5 ELSE (CASE WHEN col6 <> '' THEN col6 ELSE '' END) END) END)END) AS C3,

      (CASE WHEN col4 <> '' AND col3 <> '' AND col2 <> '' THEN col4 ELSE (CASE WHEN col5 <> '' THEN col5 ELSE (CASE WHEN col6 <> '' THEN col6 ELSE '' END) 
       END) END) AS C4,

      (CASE WHEN col5 <> '' AND col4 <> '' AND col3 <> '' AND col2 <> '' THEN col5 ELSE (CASE WHEN col6 <> '' THEN col6 ELSE '' END) 
       END)AS C5

FROM input;

辞职;

这就是我得到的。

R1  R2  C1  C2  C3  C4 C5 
 A   B  41  42  42  42              
 D   E  63  65  68  68 
 X   Y  23  25  55  89 99 
 K   L  12  22  22  22
 U   V  22  88  11  11 11 

这段代码没有给我想要的确切结果,我也认为应该有一种更简单的方法来做到这一点。请指教。

4

4 回答 4

1

[假设值的顺序并不重要,只要缺失值在一侧并且非缺失值在另一侧]您可以使用该call sortn例程。call sortn本质上以与对行排序相同的方式对列中的值进行PROC SORT排序。

data have;
input ref1 $ ref2 $ col1-col6;
datalines;
 A B 41 . 42 . . .
 D E . 63 65 . 68 .
 X Y 23 25 55 . 89 99
 K L 12 . . 22 . .
 U V . . 22 88 . 11
 ;
 run;

DATA WANT;
SET have;
ARRAY myVars{6} col6-col1;
call sortn(of myVars(*));
run;

/*note the reversed col6-col1, this is because sortn only does
   ascending order sort, if we want descending then array variables must be specified in reverse order*/
于 2013-10-30T16:30:12.040 回答
0

可能有一些聪明的技巧,但乍一看,我会使用数组。

data OUTPUT (drop=i j);
    set INPUT;
    array c_array{6} Col1-Col6;
    do i=1 to 6;
        /*if missing, look further in the array for a value*/
        if missing(c_array{i}) then do;
            do j=i+1 to 6 by 1;
                /*if a value is found, copy it to the empty location, then erase it from the original spot*/
                if not missing(c_array{j}) then do;
                    c_array{i} = c_array{j};
                    c_array{j} = .;
                    leave; /*stop the loop, we found what we needed*/
                end;
            end;
            /*if you did not find anything, it is pointless to continue for the remainder of the array*/
            if missing(c_array{i}) then leave;
        end;
    end;
run;

我省略了从 ColX 到 CX 需要编写的重命名语句,但我希望这个细节足够明显。;)

于 2013-10-30T15:34:36.793 回答
0

(我不知道 SAS SQL 是否有 CTE,我想有)

下面的程序首先对 cros 表进行规范化,然后重新计算单元格值的等级,然后使用新的等级来更新原始表格。

(更新不是严格需要的,在 SAS 中为结果创建一个新表可能会更好)

-- create the data
CREATE TABLE sasmeuk
        ( ref11 CHAR(1)
        , ref2 CHAR(1)
        , col1 INTEGER
        , col2 INTEGER
        , col3 INTEGER
        , col4 INTEGER
        , col5 INTEGER
        , col6 INTEGER
        );
INSERT INTO sasmeuk
( ref11, ref2, col1, col2, col3, col4,col5,col6) VALUES
 ('A', 'B',     41 ,NULL,   42,NULL,NULL,NULL)
 ,('D', 'E', NULL,    63 ,  65,NULL,   6,NULL)
 ,('X', 'Y',     23 ,  25 ,  55 ,NULL,  89,  99)
 ,('K', 'L',     12 ,NULL,NULL,   22,NULL,NULL)
 ,('U', 'V', NULL,NULL,    22,   88,NULL,  11)
        ;

-- SELECT * FROM sasmeuk;

WITH flat AS ( -- NORMALISE the cross-table
        SELECT ref11,ref2,1 AS idx, col1 AS val FROM sasmeuk
        UNION ALL SELECT ref11,ref2,2 AS idx, col2 AS val FROM sasmeuk
        UNION ALL SELECT ref11,ref2,3 AS idx, col3 AS val FROM sasmeuk
        UNION ALL SELECT ref11,ref2,4 AS idx, col4 AS val FROM sasmeuk
        UNION ALL SELECT ref11,ref2,5 AS idx, col5 AS val FROM sasmeuk
        UNION ALL SELECT ref11,ref2,6 AS idx, col6 AS val FROM sasmeuk
        )
, argh AS ( -- Compute new ranks.
        SELECT f.*
        , rank() OVER (PARTITION BY f.ref11,f.ref2 ORDER BY f.idx) AS rnk
        FROM flat f
        WHERE f.val IS NOT NULL
        )
, inflated AS ( -- DENORMALISE: rejoin with keys, and pick col[1-6] by ranks
        SELECT a0.ref11,a0.ref2
                , a1.val AS col1 , a2.val AS col2 , a3.val AS col3
                , a4.val AS col4 , a5.val AS col5 , a6.val AS col6
        FROM sasmeuk a0
        LEFT JOIN argh a1 ON a1.ref11 =  a0.ref11 AND a1.ref2  = a0.ref2 AND a1.rnk =1
        LEFT JOIN argh a2 ON a2.ref11 =  a0.ref11 AND a2.ref2  = a0.ref2 AND a2.rnk =2
        LEFT JOIN argh a3 ON a3.ref11 =  a0.ref11 AND a3.ref2  = a0.ref2 AND a3.rnk =3
        LEFT JOIN argh a4 ON a4.ref11 =  a0.ref11 AND a4.ref2  = a0.ref2 AND a4.rnk =4
        LEFT JOIN argh a5 ON a5.ref11 =  a0.ref11 AND a5.ref2  = a0.ref2 AND a5.rnk =5
        LEFT JOIN argh a6 ON a6.ref11 =  a0.ref11 AND a6.ref2  = a0.ref2 AND a6.rnk =6
        )
UPDATE sasmeuk dst -- Do the final update
SET col1 = src.col1 , col2 = src.col2 , col3 = src.col3
        , col4 = src.col4 , col5 = src.col5 , col6 = src.col6
FROM inflated src
WHERE src.ref11 = dst.ref11 AND src.ref2  = dst.ref2
        ;

SELECT * FROM sasmeuk
        ;

更新:下面的代码不太优雅,但更简单:

UPDATE sasmeuk SET col5 = col6, col6 = NULL WHERE col5 IS NULL;
UPDATE sasmeuk SET col4 = col5, col5 = NULL WHERE col4 IS NULL;
UPDATE sasmeuk SET col3 = col4, col4 = NULL WHERE col3 IS NULL;
UPDATE sasmeuk SET col2 = col3, col3 = NULL WHERE col2 IS NULL;
UPDATE sasmeuk SET col1 = col2, col2 = NULL WHERE col1 IS NULL;
于 2013-10-30T15:35:22.893 回答
0

请参阅下面的有效代码。JT85提供的解决方案。

set test;
  array in col1-col6;
  array out $ c1-c6;
  j=1;
  do i=1 to 5;
    if in(i) ne ' ' then do;
      out(j)=in(i);
      j+1;
    end;
  end;
run;

谢谢

于 2013-10-31T20:23:43.140 回答