0

这是我之前关于 SO的帖子的后续内容。

我正在尝试制作人口统计数据的频率表,包括种族、性别和种族。一张表是一项研究中西班牙裔参与者的性别交叉表。然而,到目前为止,还没有西班牙裔参与者。因此,该表将全为零,但我们仍然必须报告它。

这可以在R中完成,但到目前为止,我还没有找到 SAS 的解决方案。示例数据如下。


data race;
input race  eth  sex   ;
cards;
1   2   1
1   2   1
1   2   2
2   2   1
2   2   2
2   2   1
3   2   2
3   2   2
3   2   1
4   2   2
4   2   1
4   2   2
run;




data class;
    do race = 1,2,3,4,5,6,7;
        do eth = 1,2,3;
            do sex = 1,2;
                output;
            end;
        end;
    end;
run;



proc format;

    value   frace   1 = "American Indian / AK Native"
                        2 = "Asian"
                        3 = "Black or African American"
                        4 = "Native Hawiian or Other PI"
                        5 = "White"
                        6 = "More than one race"
                        7 = "Unknown or not reported" ;

    value   feth            1 = "Hispanic or Latino"
                            2 = "Not Hispanic or Latino"
                            3 = "Unknown or Not reported" ;

    value   fsex        1 = "Male"
                        2 = "Female"  ;

run;






*****  ethnicity by sex  ;

proc tabulate data = race missing classdata=class ;
class  race eth sex ;
table eth, sex / misstext = '0' printmiss;
format race frace. eth feth.  sex fsex. ;
run;



*****  race by sex  ;

proc tabulate data = race missing classdata=class ;
class  race eth sex ;
table race, sex / misstext = '0' printmiss;
format race frace. eth feth.  sex fsex. ;
run;



*****  race by sex, for Hispanic only  ;
*****  log indicates that a logical page with only missing values has been deleted ;
*****  Thanks SAS, you're a big help...  ;

proc tabulate data = race missing classdata=class ;
where eth = 1 ;
class  race eth sex ;
table race, sex / misstext = '0' printmiss;
format race frace. eth feth.  sex fsex. ;
run;

我知道代码真的不能工作,因为我选择 where ethis equal to 1 (没有满足条件的案例......)。指定要运行的命令by eth也不起作用。

非常感谢任何指导...

4

3 回答 3

1

我认为最简单的方法是在具有缺失值的数据中创建一行。您可以查看以下论文以获取有关如何在更大范围内执行此操作的建议:

http://www.nesug.org/Proceedings/nesug11/pf/pf02.pdf

PROC FREQ 具有 SPARSE 选项,它为您提供表中所有变量的所有可能组合(包括缺失的变量),但它看起来并不能完全满足您的需求。

于 2013-04-18T18:36:18.290 回答
1

看起来我们在 Westat 的好朋友已经解决了这个问题。此处显示了该解决方案的描述。

为方便起见,代码如下所示,但引用时请引用原文


PROC FORMAT;
value ethnicf
1 = 'Hispanic or Latino'
2 = 'Not Hispanic or Latino'
3 = 'Unknown (Individuals Not Reporting Ethnicity)';
value racef
1 = 'American Indian or Alaska Native'
2 = 'Asian'
3 = 'Native Hawaiian or Other Pacific Islander'
4 = 'Black or African American'
5 = 'White'
6 = 'More Than One Race'
7 = 'Unknown or Not Reported';
value gndrf
1 = 'Male'
2 = 'Female'
3 = 'Unknown or Not Reported';
RUN;



DATA shelldata;
format ethlbl ethnicf. racelbl racef. gender gndrf.;
    do ethcat = 1 to 2;
        do ethlbl = 1 to 3;
            do racelbl = 1 to 7;
                do gender = 1 to 3;
                output;
                end;
            end;
        end;
    end;
RUN;



DATA test;
input pt $ 1-3 ethlbl gender racelbl ;
cards;
x1 2 1 5
x2 2 1 5
x3 2 1 5
x4 2 1 5
x5 2 1 5
x6 2 2 2
x7 2 2 2
x8 2 2 5
x9 2 2 4
x10 2 2 4
RUN;





DATA enroll;
set test;
if ethlbl = 1 then ethcat = 1;
else ethcat = 2;
format ethlbl ethnicf. racelbl racef. gender gndrf.;
label ethlbl = 'Ethnic Category'
racelbl = 'Racial Categories'
gender = 'Sex/Gender';
RUN;




%MACRO TAB_WHERE;


/* PROC SQL step creates a macro variable whose */
/* value will be the number of observations */
/* meeting WHERE clause criteria. */
PROC SQL noprint;
select count(*)
into :numobs
from enroll
where ethcat=1;
QUIT;


/* PROC FORMAT step to display all numeric values as zero. */
PROC FORMAT;
value allzero low-high='     0';
RUN;

/* Conditionally execute steps when no observations met criteria. */
%if &numobs=0 %then 
    %do;
        %let fmt = allzero.; /* Print all cell values as zeroes */
        %let str = ; /*No Cases in Subset - WHERE cannot be used */
    %end;

%else
    %do;
        %let fmt = 8.0;
        %let str = where ethcat = 1;
    %end;


PROC TABULATE data=enroll classdata=shelldata missing format=&fmt;
&str;
format racelbl racef. gender gndrf.;
class racelbl gender;
classlev racelbl gender;
keyword n pctn all;
tables (racelbl all='Racial Categories: Total of Hispanic or Latinos'),
gender='Sex/Gender'*N=' ' all='Total'*n='' / printmiss misstext='0'
box=[LABEL=' '];
title1 font=arial color=darkblue h=1.5 'Inclusion Enrollment Report';
title2 ' ';
title3 font=arial color=darkblue h=1' PART B. HISPANIC ENROLLMENT REPORT:
Number of Hispanic or Latinos Enrolled to Date (Cumulative)';
RUN;


%MEND TAB_WHERE;

%TAB_WHERE

于 2013-04-18T18:59:54.100 回答
0

我发现这篇论文信息量很大:

哦不,零行:5种方法来总结绝对没有

preloadfmtproc 中的选项意味着(方法 5)是我最喜欢的。一旦创建了必要的格式,就没有必要添加虚拟数据。奇怪的是,他们还没有将此选项添加到 proc freq。

于 2013-04-22T13:24:57.337 回答