-1

我正在尝试对我的输入数据进行对数、平方、立方和对数赔率转换,以提供对单变量回归中表现最佳的转换的详尽概述

我在具有 1,000 个变量的数据集上尝试了以下代码 - 它返回错误/内存不足或根本无法执行。使用数组以这种方式整体转换变量是否有任何限制?

/*Create a table for reference*/
DATA input_data;
    ARRAY var_[*] var_1-var_1000;

    DO i = 1 to 1000;
        DO i = 1 to 1000;
            var_(i)= i*j;
            output;
        END;
    END;
RUN;

/*Log, square, cubic, logit transform all variables*/
DATA input_transform;
    SET input_data;
    ARRAY var[*]    var_1-var_1000;
    ARRAY log[*]    log_1-log_1000;
    ARRAY logit[*]  logit_1-logit_1000;
    ARRAY sq[*]     sq_1-sq_1000;
    ARRAY cubic[*]  cubic_1-cubic_1000;

    DO i = 1 to 1000;
        log(i)      = log(var(i));
        logit(i)    = log((var(i))/(1-var(i)));
        sq(i)       = var(i)**2;
        cubic(i)    = var(i)**3;
    END;
RUN;

一个包含 5000 个变量的新数据集,每个变量都有各自的转换

4

2 回答 2

1

您正在使用I作为两个或两个嵌套 do 循环的索引变量。那可能是把他们搞砸了。

此外,您的第一个数据步骤是编写 1,002 个变量的 1,000,000 个观察值,仅填充“数组”的左下三角形。您真的想要OUTPUT循环内的语句吗?

于 2019-02-15T12:49:14.627 回答
0

只要您的代码是正确的,假设这没有问题。这是一个示例和日志。

option notes;
%let size=1000;

/*Create a table for reference*/
DATA input_data;
    ARRAY var_[*] var_1-var_&size.;

    DO i = 1 to &size.;
        DO j = 1 to &size.;
            var_(j)= i*j;
        END;
        output;
    END;
RUN;

/*Log, square, cubic, logit transform all variables*/
DATA input_transform;
    SET input_data;
    ARRAY _var[*]    var_1-var_&size.;
    ARRAY _log[*]    log_1-log_&size.;
    ARRAY _logit[*]  logit_1-logit_&size.;
    ARRAY _sq[*]     sq_1-sq_&size.;
    ARRAY _cubic[*]  cubic_1-cubic_&size.;

    DO i = 1 to &size.;
        _log(i)      = log(_var(i));
         _logit(i)    = sqrt(_var(i));
        _sq(i)       = _var(i)**2;
        _cubic(i)    = _var(i)**3;
    END;
RUN;

和日志:

1576      option notes;
1577      %let size=1000;
1578
1579      /*Create a table for reference*/
1580      DATA input_data;
1581          ARRAY var_[*] var_1-var_&size.;
1582
1583          DO i = 1 to &size.;
1584              DO j = 1 to &size.;
1585                  var_(j)= i*j;
1586              END;
1587              output;
1588          END;
1589      RUN;

NOTE: The data set WORK.INPUT_DATA has 1000 observations and 1002
      variables.
NOTE: DATA statement used (Total process time):
      real time           0.03 seconds
      cpu time            0.03 seconds


1590
1591      /*Log, square, cubic, logit transform all variables*/
1592      DATA input_transform;
1593          SET input_data;
1594          ARRAY _var[*]    var_1-var_&size.;
1595          ARRAY _log[*]    log_1-log_&size.;
1596          ARRAY _logit[*]  logit_1-logit_&size.;
1597          ARRAY _sq[*]     sq_1-sq_&size.;
1598          ARRAY _cubic[*]  cubic_1-cubic_&size.;
1599
1600          DO i = 1 to &size.;
1601              _log(i)      = log(_var(i));
1602               _logit(i)    = sqrt(_var(i));
1603              _sq(i)       = _var(i)**2;
1604              _cubic(i)    = _var(i)**3;
1605          END;
1606      RUN;

NOTE: There were 1000 observations read from the data set
      WORK.INPUT_DATA.
NOTE: The data set WORK.INPUT_TRANSFORM has 1000 observations and 5002
      variables.
NOTE: DATA statement used (Total process time):
      real time           0.12 seconds
      cpu time            0.10 seconds
于 2019-02-15T16:50:21.177 回答