0

我正在尝试使用 itext 框架将 pdf 文件转换为 csv 以导入到 excel 中。

输出是乱码,我认为我缺少有关格式转换的步骤,但是我似乎无法在 itext 站点中找到信息并正在寻求帮助。

电流如下。

package com.pdf.convert;

import java.io.FileOutputStream;
import java.io.IOException;

import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;

public class ThirdPDF {

    private static String INPUTFILE = "/location/test.pdf";
    private static String OUTPUTFILE = "/location/test.csv";

    public static void main(String[] args) throws DocumentException,
            IOException {
        Document document = new Document();

        PdfWriter writer = PdfWriter.getInstance(document,
                new FileOutputStream(OUTPUTFILE));
        document.open();
        PdfReader reader = new PdfReader(INPUTFILE);
        int n = reader.getNumberOfPages();
        PdfImportedPage page;
        // Go through all pages
        for (int i = 1; i <= n; i++) {
            // Only page number 2 will be included
            if (i == 2) {
                page = writer.getImportedPage(reader, i);
                Image instance = Image.getInstance(page);
                document.add(instance);
            }
        }
        document.close();
    }
} 
4

1 回答 1

0

将 PDF 文件转换为 CSV 文件。当前目录和文件创建基于 Android 框架。根据您的框架相应地更改您的路径和目录。

private void convertPDFToCSV(String pdfFilePath) {
        String myfolder = Environment.getExternalStorageDirectory() + "/Mycsv";
        if (createFolder(myfolder)) {
            try {
                Document document = new Document();
                document.open();
                FileOutputStream fos=new FileOutputStream(myfolder + "/MyCSVFile.csv");
                StringBuilder parsedText=new StringBuilder();
                PdfReader reader1 = new PdfReader(pdfFilePath);
                int n = reader1.getNumberOfPages();
                for (int i = 0; i <n ; i++) {
                    parsedText.append(parsedText+PdfTextExtractor.getTextFromPage(reader1, i+1).trim()+"\n") ;
                    //Extracting the content fromx the different pages
                }
                StringReader stReader = new StringReader(parsedText.toString());
                int t;
                while((t=stReader.read())>0)
                    fos.write(t);
                document.close();

            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    private boolean createFolder(String myfolder) {

        File f = new File(myfolder);
        if (!f.exists()) {
            if (!f.mkdir()) {
                return false;
            } else {
                return true;
            }
        }else{
            return true;
        }
    }
于 2018-11-02T11:07:26.070 回答