我了解PDFKit
允许将文本+格式提取为NSAttributedString
,但我找不到任何有关使用 Swift 从任何 PDF 文档中提取每个单独图形的信息。
任何帮助将不胜感激,谢谢!
编辑:https ://stackoverflow.com/a/40788449/2303865解释了如何将整个页面转换为图像,但是我需要解析已经是一系列 PDF 文档一部分的所有图像,而不知道它们的位置,所以该解决方案不适合我的问题。
我了解PDFKit
允许将文本+格式提取为NSAttributedString
,但我找不到任何有关使用 Swift 从任何 PDF 文档中提取每个单独图形的信息。
任何帮助将不胜感激,谢谢!
编辑:https ://stackoverflow.com/a/40788449/2303865解释了如何将整个页面转换为图像,但是我需要解析已经是一系列 PDF 文档一部分的所有图像,而不知道它们的位置,所以该解决方案不适合我的问题。
这是一个 Swift 函数,它从 pdf 页面中提取图像,更具体地说是所有具有子类型“图像”的对象:
import PDFKit
func extractImages(from pdf: PDFDocument, extractor: @escaping (ImageInfo)->Void) throws {
for pageNumber in 0..<pdf.pageCount {
guard let page = pdf.page(at: pageNumber) else {
throw PDFReadError.couldNotOpenPageNumber(pageNumber)
}
try extractImages(from: page, extractor: extractor)
}
}
func extractImages(from page: PDFPage, extractor: @escaping (ImageInfo)->Void) throws {
let pageNumber = page.label ?? "unknown page"
guard let page = page.pageRef else {
throw PDFReadError.couldNotOpenPage(pageNumber)
}
guard let dictionary = page.dictionary else {
throw PDFReadError.couldNotOpenDictionaryOfPage(pageNumber)
}
guard let resources = dictionary[CGPDFDictionaryGetDictionary, "Resources"] else {
throw PDFReadError.couldNotReadResources(pageNumber)
}
if let xObject = resources[CGPDFDictionaryGetDictionary, "XObject"] {
print("reading resources of page", pageNumber)
func extractImage(key: UnsafePointer<Int8>, object: CGPDFObjectRef, info: UnsafeMutableRawPointer?) -> Bool {
guard let stream: CGPDFStreamRef = object[CGPDFObjectGetValue, .stream] else { return true }
guard let dictionary = CGPDFStreamGetDictionary(stream) else {return true}
guard dictionary.getName("Subtype", CGPDFDictionaryGetName) == "Image" else {return true}
let colorSpaces = dictionary.getNameArray(for: "ColorSpace") ?? []
let filter = dictionary.getNameArray(for: "Filter") ?? []
var format = CGPDFDataFormat.raw
guard let data = CGPDFStreamCopyData(stream, &format) as Data? else { return false }
extractor(
ImageInfo(
name: String(cString: key),
colorSpaces: colorSpaces,
filter: filter,
format: format,
data: data
)
)
return true
}
CGPDFDictionaryApplyBlock(xObject, extractImage, nil)
}
}
struct ImageInfo: CustomDebugStringConvertible {
let name: String
let colorSpaces: [String]
let filter: [String]
let format: CGPDFDataFormat
let data: Data
var debugDescription: String {
"""
Image "\(name)"
- color spaces: \(colorSpaces)
- format: \(format == .JPEG2000 ? "JPEG2000" : format == .jpegEncoded ? "jpeg" : "raw")
- filters: \(filter)
- size: \(ByteCountFormatter.string(fromByteCount: Int64(data.count), countStyle: .binary))
"""
}
}
extension CGPDFObjectRef {
func getName<K>(_ key: K, _ getter: (OpaquePointer, K, UnsafeMutablePointer<UnsafePointer<Int8>?>)->Bool) -> String? {
guard let pointer = self[getter, key] else { return nil }
return String(cString: pointer)
}
func getName<K>(_ key: K, _ getter: (OpaquePointer, K, UnsafeMutableRawPointer?)->Bool) -> String? {
guard let pointer: UnsafePointer<UInt8> = self[getter, key] else { return nil }
return String(cString: pointer)
}
subscript<R, K>(_ getter: (OpaquePointer, K, UnsafeMutablePointer<R?>)->Bool, _ key: K) -> R? {
var result: R!
guard getter(self, key, &result) else { return nil }
return result
}
subscript<R, K>(_ getter: (OpaquePointer, K, UnsafeMutableRawPointer?)->Bool, _ key: K) -> R? {
var result: R!
guard getter(self, key, &result) else { return nil }
return result
}
func getNameArray(for key: String) -> [String]? {
var object: CGPDFObjectRef!
guard CGPDFDictionaryGetObject(self, key, &object) else { return nil }
if let name = object.getName(.name, CGPDFObjectGetValue) {
return [name]
} else {
guard let array: CGPDFArrayRef = object[CGPDFObjectGetValue, .array] else {return nil}
var names = [String]()
for index in 0..<CGPDFArrayGetCount(array) {
guard let name = array.getName(index, CGPDFArrayGetName) else { continue }
names.append(name)
}
return names
}
}
}
enum PDFReadError: Error {
case couldNotOpenPageNumber(Int)
case couldNotOpenPage(String)
case couldNotOpenDictionaryOfPage(String)
case couldNotReadResources(String)
case cannotReadXObjectStream(xObject: String, page: String)
}
您应该知道 PDF 中的图像可以用不同的方式表示。它们可以作为独立的 JPG 嵌入,也可以作为原始像素数据(无损压缩或非无损压缩)嵌入,其中包含有关压缩、色彩空间、宽度、高度等的元信息。
因此,如果您想导出嵌入的 JPG:此代码可以正常工作。但是,如果您还想可视化原始图像,您将需要更多的解析代码。要开始使用,您可以查看PDF 2.0 规范(或该规范的旧免费版本),以及此要点,它可以解释任何颜色配置文件中的 JPG 和具有以下任何颜色配置文件的原始图像: