1

这是输入的 JSON 文件。它必须在 SAS 数据集中进行解析。

"results":
[
 {
    "acct_nbr": 1234,
    "firstName": "John",
    "lastName": "Smith",
    "age": 25,
    "address": {
        "streetAddress": "21 2nd Street",
        "city": "New York",
        "state": "NY",
        "postalCode": "10021"
        }
 }
,
{
    "acct_nbr": 3456,
    "firstName": "Sam",
    "lastName": "Jones",
    "age": 32,
    "address": {
        "streetAddress": "25 2nd Street",
        "city": "New Jersy",
        "state": "NJ",
        "postalCode": "10081"
        }
 }
]

我希望 SAS 数据集中只有 Address 字段的输出,如下所示:

ACCT_NBR    FIELD_NAME  FIELD_VALUE
1234    streetAddress   21 2nd Street
1234    city    New York
1234    state   NY
1234    postalCode  10021
3456    streetAddress   25 2nd Street
3456    city    New Jersy
3456    state   NJ
3456    postalCode  10081

我尝试过单独的方式,但没有类似的输出。甚至尝试从PDF扫描...但无法获得所需的输出...

这是我的代码......和输出......

LIBNAME src  '/home/user/read_JSON';

filename data '/home/user/read_JSON/test2.json';
data src.testdata2;
    infile data lrecl = 32000 truncover scanover;
        input @'"streetAddress": "' streetAddress $255. @'"city": "' city $255. @'"state": "' state $2. @'"postalCode": "' postalCode $255.;
        streetAddress = substr(streetAddress,1,index(streetAddress,'",')-2);
        city = substr( city,1,index( city,'",')-2);
        state = substr(state,1,index(state,'",')-2);
        postalCode = substr(postalCode,1,index(postalCode,'",')-2);
run;

proc print data=src.testdata2;
RUN;

.lst文件中的输出

The SAS System   09:44 Tuesday, January 14, 2014   1
           street                            postal
 Obs      Address         city      state     Code

  1     21 2nd Stree    New Yor       NY      10021"
  2     25 2nd Stree    New Jers      NJ      10081"
4

3 回答 3

4

要使用纯 SAS 解决方案回答您的问题,您的问题是双重的:

  • 使用SCAN而不是substr获取非逗号/引号部分
  • acct_nbr是一个数字,因此您需要从输入中删除最后的引号。

这是正确的代码(我更改了目录,您需要将它们改回来):

filename data 'c:\temp\json.txt';
data testdata2;
    infile data lrecl = 32000 truncover scanover;
        input 
            @'"acct_nbr": ' acct_nbr $255.
            @'"streetAddress": "' streetAddress $255. 
            @'"city": "' city $255. 
            @'"state": "' state $2. 
            @'"postalCode": "' postalCode $255.;

        acct_nbr=scan(acct_nbr,1,',"');
        streetAddress = scan(streetAddress,1,',"');
        city = scan(city,1,',"');
        state = scan(state,1,',"');
        postalCode = scan(postalCode,1,',"');
run;

proc print data=testdata2;
RUN;
于 2014-01-14T16:14:25.950 回答
4

您可以使用它proc groovy来非常轻松地解析 JSON(假设您了解 Groovy)。 这篇关于向 Twitter 进行身份验证的 SAS 博客显示了如何执行此操作的详细示例;这是一些亮点。

这假设您拥有 Groovy JAR 文件 ( http://groovy.codehaus.org/Download ) 和输出文件的方法(示例使用OpenCSV)。

以下是我的尝试;我认为它不太有效,但我也不了解 Groovy。一般概念应该是正确的。如果您想尝试这种方法,但无法弄清楚具体情况,您可以重新标记您的问题,使用该标记提出一个新问题。

%let groovydir=C:\Program Files\SASHome_9.4\SASFoundation\9.4\groovy; *the location the groovy JARs are located at;

%let sourcefile=c:\temp\json.txt;
%let outfile=c:\temp\json.csv;

proc groovy classpath="&groovydir.\groovy-all-2.2.0.jar;&groovydir.\opencsv-2.3.jar"; 

   submit "&sourcefile" "&outfile"; 
      import groovy.json.*
      import au.com.bytecode.opencsv.CSVWriter

      def input = new File(args[0]).text
      def output = new JsonSlurper().parseText(input)
      def csvoutput = new FileWriter(args[1])

      CSVWriter writer = new CSVWriter(csvoutput);

      String[] header = new String[8];
      header[0] = "results.acct_nbr";
      header[1] = "results.firstName";
      header[2] = "results.lastName";
      header[3] = "results.age";
      header[4] = "results.address.streetAddress";
      header[5] = "results.address.city";
      header[6] = "results.address.state";
      header[7] = "results.address.postalCode";
      writer.writeNext(header);

      output.statuses.each {
         String[] content = new String[8];
         content[0] = it.results.acct_nbr.toString();
         content[1] = it.results.firstName.toString();
         content[2] = it.results.lastName.toString();
         content[3] = it.results.age.toString();
         content[4] = it.results.address.streetAddress.toString();
         content[5] = it.results.address.city.toString();
         content[6] = it.results.address.state.toString();
         content[7] = it.results.address.postalCode.toString(); 
         writer.writeNext(content)
      }         

      writer.close();

    endsubmit; 
 quit;
于 2014-01-14T15:19:17.890 回答
0

我在 sas.com 上的一个线程中使用了这个 json 文件和上面的代码作为示例。那里的一位专家程序员非常慷慨并想出了一个解决方案。请注意,json 文件应包含在“{}”中。

链接:https ://communities.sas.com/thread/72163

代码:

filename cp temp;
proc groovy classpath=cp;


add classpath="C:\Program Files\Java\groovy-2.3.4\embeddable\groovy-all-2.3.4.jar";
/*or*/
/*
add classpath="C:\Program Files\Java\groovy-2.3.4\lib\groovy-2.3.4.jar";
add classpath="C:\Program Files\Java\groovy-2.3.4\lib\groovy-json-2.3.4.jar";
*/

submit parseonly;
import groovy.json.JsonSlurper
class MyJsonParser {
    def parseFile(path) {
     def jsonFile = new File(path)
  def jsonText = jsonFile.getText()
        def InputJSON = new JsonSlurper().parseText(jsonText)
        def accounts = []


        InputJSON.results.each{
            accounts << [
                    acct_nbr      : it.acct_nbr.toString(),
                    firstName     : it.firstName,
                    lastName      : it.lastName,
                    age           : it.age.toString(),
                    streetAddress : it.address.streetAddress,
                    city          : it.address.city,
                    state         : it.address.state,
                    postalCode    : it.address.postalCode
            ]
        }


        return accounts
    }
}
endsubmit;


submit parseonly;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;


public class MyJsonParser4Sas {
    public String filename = "";


    public void init() {
        MyJsonParser myParser = new MyJsonParser();
        accounts = myParser.parseFile(filename);
        iter = accounts.iterator();
    }


    public boolean hasNext() {
        return iter.hasNext();
    }


    public void getNext() {
        account = ((LinkedHashMap) (iter.next()));
    }


    public String getString(String k) {
        return account.get(k);
    }


    protected ArrayList accounts;
    protected Iterator iter;
    protected LinkedHashMap account;
}
endsubmit;


quit;


options set=classpath "%sysfunc(pathname(cp,f))";

data accounts;
   attrib id            label="Account Index"  length=    8
          acct_nbr      label="Account Number" length=$  10
          firstName     label="First Name"     length=$  20
          lastName      label="Last Name"      length=$  30
          age           label="Age"            length=$   3
          streetAddress label="Street Address" length=$ 128
          city          label="City"           length=$  40
          state         label="State"          length=$   2
          postalCode    label="Postal Code"    length=$   5;


   dcl javaobj accounts("MyJsonParser4Sas");
   accounts.exceptiondescribe(1);


   accounts.setStringField("filename", "C:\\foo.json");


   accounts.callVoidMethod("init");


   accounts.callBooleanMethod("hasNext",rc);
   do id=1 by 1 while(rc);
      accounts.callVoidMethod("getNext");
   accounts.callStringMethod("getString", "acct_nbr", acct_nbr);
   accounts.callStringMethod("getString", "firstName", firstName);
   accounts.callStringMethod("getString", "lastName", lastName);
   accounts.callStringMethod("getString", "age", age);
   accounts.callStringMethod("getString", "streetAddress", streetAddress);
   accounts.callStringMethod("getString", "city", city);
   accounts.callStringMethod("getString", "state", state);
   accounts.callStringMethod("getString", "postalCode", postalCode);
      output;
   accounts.callBooleanMethod("hasNext",rc);
   end;

   drop rc;
run;
于 2015-03-10T07:03:05.127 回答