我正在尝试用 PDF 文件中的可用图像替换重复的图像,但结果已损坏。
PdfReader.KillIndirect
将重复图像归零,但writer.AddDirectImageSimple
不会将其替换为先前可用图像的引用。这里有什么问题?
这是代码:
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace ReplaceDuplicateImages
{
class Program
{
/// <summary>
/// Adding one image, 2 times.
/// </summary>
private static void createSampleFile()
{
using (var pdfDoc = new Document(PageSize.A4))
{
var pdfWriter = PdfWriter.GetInstance(pdfDoc, new FileStream("Test.pdf", FileMode.Create));
pdfDoc.Open();
var table = new PdfPTable(new float[] { 1, 2 });
table.AddCell(Image.GetInstance("01.png"));
table.AddCell(Image.GetInstance("01.png"));
pdfDoc.Add(table);
}
}
private static void RemoveDuplicateImagesFromPdfFile(string inFile, string outFile)
{
var pdfReader = new PdfReader(inFile);
var pdfStamper = new PdfStamper(pdfReader, new FileStream(outFile, FileMode.Create));
var writer = pdfStamper.Writer;
var md5Service = new MD5CryptoServiceProvider();
var enc = new UTF8Encoding();
var imagesDictionary = new Dictionary<string, PRIndirectReference>();
int pageNum = pdfReader.NumberOfPages;
for (int i = 1; i <= pageNum; i++)
{
var page = pdfReader.GetPageN(i);
var resources = PdfReader.GetPdfObject(page.Get(PdfName.RESOURCES)) as PdfDictionary;
if (resources == null) continue;
var xObject = PdfReader.GetPdfObject(resources.Get(PdfName.XOBJECT)) as PdfDictionary;
if (xObject == null) continue;
foreach (var name in xObject.Keys)
{
var pdfObject = xObject.Get(name);
if (!pdfObject.IsIndirect()) continue;
var imgObject = PdfReader.GetPdfObject(pdfObject) as PdfDictionary;
if (imgObject == null) continue;
var subType = PdfReader.GetPdfObject(imgObject.Get(PdfName.SUBTYPE)) as PdfName;
if (subType == null) continue;
if (!PdfName.IMAGE.Equals(subType)) continue;
var imageBytes = PdfReader.GetStreamBytesRaw((PRStream)imgObject);
var md5 = enc.GetString(md5Service.ComputeHash(imageBytes));
if (!imagesDictionary.ContainsKey(md5)) // is it duplicate?
{
imagesDictionary.Add(md5, (PRIndirectReference)pdfObject);
}
else
{
PdfReader.KillIndirect(pdfObject); // nulls the duplicate image
// trying to replace it with the reference of the available image
var imageRef = imagesDictionary[md5];
var image = Image.GetInstance(imageRef);
Image maskImage = image.ImageMask; // it's always null here.
if (maskImage != null)
writer.AddDirectImageSimple(maskImage);
writer.AddDirectImageSimple(image, (PRIndirectReference)pdfObject);
}
}
}
pdfReader.RemoveUnusedObjects();
pdfReader.Close();
pdfStamper.Close();
}
static void Main(string[] args)
{
createSampleFile();
RemoveDuplicateImagesFromPdfFile("test.pdf", "Optimized.pdf");
Process.Start("Optimized.pdf");
}
}
}
我知道PdfCopy
和PdfSmartCopy
。我不想使用它们。