正如 Vasja 所说,CURL 是这样做的方法。在下面的示例中,我使用了命名管道,而不是通过 systask 调用 curl。这是代码示例(尽管它不适合胆小的人):
**
** GLOLBALS
*;
%let curl_executable = d:\xxx\curl.exe;
%let maxchars = 3000;
%let userpass = User:Pass;
data _null_;
**
** USE THIS VERSION IF YOU JUST HAVE A STRAIGHT URL TO RETRIEVE
*;
call symput ('curl_cmd', "&curl_executable -k -u &userpass %nrstr(https://www.mysite.com/index.php" );
run;
**
** USE THIS VERSION IF YOU NEED TO PASS IN PARAMETERS TO THE URL
**
** NOTE THAT THE CARROT (^) IS REQUIRED TO MASK THE AMPERSAND WHEN
** BEING PROCESSED THROUGHT THE COMMAND LINE PIPE. THE LOG WILL STILL COMPLAIN ABOUT
** THE MACRO VARIABLE TODT NOT BEING RESOLVED EVEN THOUGH IT IS.
*;
/*%let from=2013-01-01;*/
/*%let to =2013-01-01;*/
/*data _null_;*/
/* call symput ('curl_cmd', "&curl_executable -k -u &userpass %nrstr(https://www.mysite.com/index.php?FromDT=)&from^%nrstr(&)ToDT=&to" ); */
/*run;*/
**
** CONNECT TO SITE AND RETURN HTML RESULT TO A DATASET
*;
filename curl pipe "&curl_cmd" lrecl=32767;
data tmp;
length xml $&maxchars;
infile curl truncover end=eof;
input @1 xml $&maxchars..;
/*
** SAFETY CHECK 1 - TEST FOR TRUNCATION. LINE SHOULD NOT BE > 32K.
** THIS CAN BE FIXED BY PIPING THROUGH SED AND INSERTING LINE BREAKS IF NECESSARY.
*/
if lengthn(xml) ge &maxchars then do;
put "ERROR: FAILED FOR &from BECAUSE XML WAS > &maxchars CHARS";
put "OBSERVATION: " _n_;
put xml;
stop;
end;
run;
filename curl clear;
一旦你有了这个工作并将 HTML 保存到你的数据集中,你就需要解析结果,但这是一个完全不同的问题。