我正在使用 iTextsharp 从 PDF 文件中提取图像,我能够提取图像,但提取的图像格式不正确(即看起来像否定证明)。
代码:
string sFilePath = "Test3.pdf";
int pageNum = 1;
PdfReader pdf = new PdfReader(sFilePath);
PdfDictionary pg = pdf.GetPageN(pageNum);
PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
if (xobj == null) { return; }
int imageCount = 0;
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (!obj.IsIndirect()) { continue; }
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
PdfName type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (!type.Equals(PdfName.IMAGE)) { continue; }
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
if (bytes == null) { continue; }
using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
{
try
{
memStream.Position = 0;
System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
if (!Directory.Exists(imgPath))
Directory.CreateDirectory(imgPath);
string path = Path.Combine(imgPath, String.Format(@"{0}.jpg", ++imageCount));
System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0);
var jpegEncoder = ImageCodecInfo.GetImageEncoders().ToList().Find(x => x.FormatID == ImageFormat.Jpeg.Guid);
img.Save(path, jpegEncoder, parms);
}
catch (Exception ex)
{
}
}
}