我想以 pdf 格式提取图像,当我得到原始字节图像时,图像被“找不到适合完成此操作的成像组件”损坏。我在文件中写入字节并使用 Windows 图像查看器和图像魔术进行测试,但仍然损坏。但任何 pdf 查看器都可以显示它。甚至我可以使用 pdfium 从中获得渲染位图。
public byte[] GetImageRaw()
{
var meta = FPdfLib.Instance.PageImageObject_GetImageMetaData(handle, pageHandle);
var size = meta.width * meta.height * meta.bits_per_pixel / 8;
return FPdfLib.Instance.PageImageObject_GetImageDataRaw(handle, (int)size); // implemented down
}
public byte[] PageImageObject_GetImageDataRaw(IntPtr pageImageObject, int bufferSize)
{
lock (lockObj)
{
var buffer = new byte[bufferSize];
var size = (int)FPdf.FPDFImageObj_GetImageDataRaw(pageImageObject, buffer, (ulong)bufferSize); // implemented down
var data = size == bufferSize ? buffer : buffer[..size];
return data;
}
}
// Get the raw image data of |image_object|. The raw data is the image data as
// stored in the PDF without applying any filters. |buffer| is only modified if
// |buflen| is longer than the length of the raw image data.
//
// image_object - handle to an image object.
// buffer - buffer for holding the raw image data.
// buflen - length of the buffer in bytes.
//
// Returns the length of the raw image data.
[DllImport("pdfium.dll")]
public static extern ulong FPDFImageObj_GetImageDataRaw(FPDF_PAGEOBJECT image_object, byte[] buffer, ulong buflen);
可以毫无问题地提取pdf中的其他图像。