我一直在尝试能够提取 PDF 包中包含的 pdf 文档,但没有成功。我在任何地方都找不到任何文档或示例代码,但我知道这并非不可能,因为 Adobe Reader 应用程序和 PDFExpert 应用程序都支持它。他们可能有自己的解析器,我希望它不会变成那样......
任何将我指向正确方向的提示将不胜感激
编辑:很长一段时间后,我重新开始研究这个问题,终于弄明白了。特别感谢 iPDFDev 为我指明了正确的方向!!
下面是如何获取每个内部 CGPDFDocumentRef 的代码:
NSURL *url = [NSURL fileURLWithPath:filePath isDirectory:NO];
CGPDFDocumentRef pdf = CGPDFDocumentCreateWithURL((__bridge CFURLRef)url);
CGPDFDictionaryRef catalog = CGPDFDocumentGetCatalog(pdf);
CGPDFDictionaryRef names = NULL;
if (CGPDFDictionaryGetDictionary(catalog, "Names", &names)) {
CGPDFDictionaryRef embFiles = NULL;
if (CGPDFDictionaryGetDictionary(names, "EmbeddedFiles", &embFiles)) {
// At this point you know this is a Package/Portfolio
CGPDFArrayRef nameArray = NULL;
CGPDFDictionaryGetArray(embFiles, "Names", &nameArray);
// nameArray contains the inner documents
// it brings the name and then a dictionary from where you can extract the pdf
for (int i = 0; i < CGPDFArrayGetCount(nameArray); i+=2) {
CGPDFStringRef name = NULL;
CGPDFDictionaryRef dict = NULL;
if (CGPDFArrayGetString(nameArray, i, &name) &&
CGPDFArrayGetDictionary(nameArray, i+1, &dict)) {
NSString *_name = [self convertPDFString:name];
CGPDFDictionaryRef EF;
if (CGPDFDictionaryGetDictionary(dict, "EF", &EF)) {
CGPDFStreamRef F;
if (CGPDFDictionaryGetStream(EF, "F", &F)) {
CFDataRef data = CGPDFStreamCopyData(F, NULL);
CGDataProviderRef provider = CGDataProviderCreateWithCFData(data);
CGPDFDocumentRef _doc = CGPDFDocumentCreateWithProvider(provider);
if (_doc) {
// save the docRef somewhere (_doc)
// save the pdf name somewhere (_name)
}
CFRelease(data);
CGDataProviderRelease(provider);
}
}
}
}
}
}
- (NSString *)convertPDFString:(CGPDFStringRef)string {
CFStringRef cfString = CGPDFStringCopyTextString(string);
NSString *result = [[NSString alloc] initWithString:(__bridge NSString *)cfString];
CFRelease(cfString);
return result;
}