1

因此,由于文档转换,我想制作用于在从 PDF 转换的 HTML 文档上使用 Concept Insights 的类。我正在使用带有 Git 目录视图的 Eclipse IDE。当我运行它时,我没有得到任何响应。我想让它保持整洁,但要确保每种方法都能获得所需的信息。无论如何,这是到目前为止的代码。

package example.servlet;
package com.ibm.watson.developer_cloud.document_conversion.v1;
package com.ibm.watson.developer_cloud.concept_insights.v1;

import java.io.File;
import java.io.IOException;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.commons.io.FileUtils;

import javax.servlet.ServletException;
import javax.servlet.annotation.WebServlet;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;



import com.ibm.watson.developer_cloud.document_conversion.v1.model.Answers;
import com.ibm.watson.developer_cloud.util.CredentialUtils;



/**
 * Servlet implementation class SimpleServlet
 */
@WebServlet("/SimpleServlet")
public class SimpleServlet extends HttpServlet {

    ConceptInsights service = new ConceptInsights();
    DocumentConversion docService = new DocumentConversion(DocumentConversion.VERSION_DATE_2015_12_01);

    service.setUsernameAndPassword("username", "password");

    Accounts my_accounts = service.getAccountsInfo();
    System.out.println(my_accounts);
    String my_account_id = my_accounts.getAccounts().get(0).getId();
    System.out.println("My account ID: "+ my_account_id);
    docService.setUsernameAndPassword("docUsername", "docPassword");

    @Override
     public void init() throws ServletException {
        super.init();
        String vcap = System.getProperty("VCAP_SERVICES");
        if (vcap == null){
            try {
                vcap = FileUtils.readFileToString(new File("vcap.txt"));
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

     }

    private static final long serialVersionUID = 1L;

    /**
     * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
     */
    @Override
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        response.setContentType("text/html");
        response.getWriter().print("Hello World!");
    }

}

public void transformDocs(File[] availableDocs){
    /* Files.walk(Paths.get("/home/you/Desktop")).forEach(filePath -> {
    if (Files.isRegularFile(filePath)) {
        Document test = new Document(my_corpus, filePath);
test.setLabel("Test: "+filePath);
    }
}); 

File folder = new File("/Users/you/folder/");
File[] listOfFiles = folder.listFiles();

for (File file : listOfFiles) {
    if (file.isFile()) {
        Document test = new Document(my_corpus, filePath);
test.setLabel("Test: "+filePath);
    }
}*/
    }
}

public void fixTheInput(){
    CredentialUtils.setServices(vcap);
    File doc = new File("docs/cpc-scheme-A.pdf");
    Answers pdfToHTML = service.convertDocumentToHTML(doc);
    System.out.println(pdfToHTML);

    Corpus patents1 = new Corpus("evelandments", "PatentsCorpus");
    Corpus patents2 = new Corpus("perdermy1ipres6", "patents");

    Map<String, Object> params = new HashMap<String, Object>();
    params.put(ConceptInsights.LIMIT, 3);

    Concepts concepts1 = service.getCorpusRelatedConcepts(patents1, params);
    Concepts concepts2 = service.getCorpusRelatedConcepts(patents2, params);

    System.out.println(concepts1);
    System.out.println(concepts2);
}

public void createTheCorpus(){
    Corpus my_corpus = new Corpus(service.getAccountsInfo().getAccounts().get(0).getId(), "CPCScheme");

    // add corpus metadata 
    AccountPermission permission = new AccountPermission();
    permission.setAccountId("account_id");
    permission.setPermission("ReadWriteAdmin");
    List<AccountPermission> accountPermissions = new ArrayList<AccountPermission>();
    accountPermissions.add(permission);
    my_corpus.setAccountPermissions(accountPermissions);
    //

    service.createCorpus(my_corpus);
    System.out.println("Created corpus: "+ my_corpus.getName());

    Document test = new Document(my_corpus, "test.html");
    test.setLabel("Test: my first document");


    }
}
4

1 回答 1

2

这是一个示例类,它将使用 Document Conversion 将 pdf 转换为文本;并使用概念洞察提取概念。

package com.ibm.watson.developer_cloud.retrieve_and_rank.v1;

import java.io.File;
import java.io.FileInputStream;

import org.apache.commons.io.IOUtils;

import com.ibm.watson.developer_cloud.concept_insights.v2.ConceptInsights;
import com.ibm.watson.developer_cloud.concept_insights.v2.model.Annotations;
import com.ibm.watson.developer_cloud.concept_insights.v2.model.Graph;
import com.ibm.watson.developer_cloud.concept_insights.v2.model.ScoredConcept;
import com.ibm.watson.developer_cloud.document_conversion.v1.DocumentConversion;
import com.ibm.watson.developer_cloud.util.CredentialUtils;


public class WatsonExample {
  public static void loadCredentialsFromFile() {
    try {
      CredentialUtils.setServices(IOUtils.toString(new FileInputStream("vcap.txt")));
    } catch (Exception e) {
      System.out.println("Error reading vcap.txt file");
      e.printStackTrace();
    }
  }


  public static void main(String[] args) {
    loadCredentialsFromFile();
    ConceptInsights conceptInsights = new ConceptInsights();
    DocumentConversion documentConversion = new DocumentConversion(DocumentConversion.VERSION_DATE_2015_12_01);

    // extract the text from the pdf
    String text = documentConversion.convertDocumentToText(new File("docs/cpc-scheme-A.pdf"));

    // annotate the text
    Annotations annotations = conceptInsights.annotateText(Graph.WIKIPEDIA, text);

    // print out concepts 
    for (ScoredConcept concept : annotations.getAnnotations()) {
      System.out.println(concept.toString());
    }
  }

}

vcap.txt如果存在,它将加载凭据。

于 2016-03-28T17:02:41.410 回答