我一直在研究在我的 iOS 应用程序中使用 PDF 有一段时间了。我已经解决了一些难题,例如扫描操作员并在 UIWebView 中显示 PDF。但是,我真正需要做的是识别PDF 文档中的可编辑字段。
理想情况下,我希望能够直接与字段交互,但这听起来非常困难,而且不是明显的第一步。我已经在与可以以这种方式操作 PDF 的 Windows 服务进行交互,并且可以解决识别可编辑字段、在表单视图中从用户那里收集字段数据并将该数据发送回服务器的问题。问题是我看不到如何识别这些字段。我正在与政府发布的 PDF 进行交互,例如 I-9 和 W-4,因此我无法控制 PDF 的创建或字段的命名。这就是为什么我需要动态提取它们。任何帮助和/或参考将不胜感激。
我正在使用 Apple 的 Quatrz 2D Programming guide 中的 [this reference](https://developer.apple.com/library/mac/#documentation/graphicsimaging/conceptual/drawingwithquartz2d/dq_pdf_scan/dq_pdf_scan.html"PDF Document Parsing")扫描 PDF 时触发操作员回调,但这并不能帮助我找到可编辑的字段。
我也只是简单地加载一个带有 PDF 数据的 UIWebView 以显示给用户。
[_webView loadData:decodedData MIMEType:@"application/pdf" textEncodingName:@"utf-8" baseURL:nil];
更新:
我构建了一个 PDF Helper 类(如下所示)来遍历目录中所有可能的对象类型。最初我没有处理数组中的嵌套字典,所以我没有看到表单字段。一旦我解决了这个问题,我意识到我必须考虑到父引用,以避免循环递归调用会启动无限循环。下面的代码显示了来自文档目录的大量信息。现在我只需要解析它来隔离我需要的表单字段。
PDFHelper.h
#import <Foundation/Foundation.h>
id selfClass;
@interface PDFHelper : NSObject
@property (nonatomic, strong) NSData *pdfData;
@property (nonatomic, strong) NSMutableDictionary *pdfDict;
@property (nonatomic) int catalogLevel;
-(NSArray *) copyPDFArray:(CGPDFArrayRef)arr referencingDictionary:(CGPDFDictionaryRef)dict referencingKey:(const char *)key;
-(NSArray *) getFormFields;
-(CGPDFDictionaryRef) getDocumentCatalog;
@end
PDFHelper.m
#import "PDFHelper.h"
#import "FileHelpers.h"
#import "Log.h"
@implementation PDFHelper
@synthesize pdfData = _pdfData;
@synthesize pdfDict = _pdfDict;
@synthesize catalogLevel = _catalogLevel;
-(id)init
{
self = [super init];
if(self)
{
selfClass = self;
_pdfDict = [[NSMutableDictionary alloc] init];
_catalogLevel = 1;
}
return self;
}
-(NSArray *) getFormFields
{
CGPDFDictionaryRef acroForm = NULL;
if (CGPDFDictionaryGetDictionary([self getPdfDocDictionary], "AcroForm", &acroForm))
CGPDFDictionaryApplyFunction(acroForm, getDictionaryObjects, acroForm);
return [_pdfDict objectForKey:@"XFA"];
}
-(CGPDFDictionaryRef) getDocumentCatalog
{
CGPDFDictionaryRef docCatalog = [self getPdfDocDictionary];
CGPDFDictionaryApplyFunction(docCatalog, getDictionaryObjects, docCatalog);
return docCatalog;
}
-(CGPDFDictionaryRef) getPdfDocDictionary
{
NSURL *pdf = [[NSURL alloc] initFileURLWithPath:[FileHelpers pathInLibraryDirectory:@"file.pdf"]];
[_pdfData writeToFile:[pdf path] atomically:YES];
CGPDFDocumentRef pdfDocument = CGPDFDocumentCreateWithURL((__bridge CFURLRef)pdf);
CGPDFDictionaryRef returnDict = CGPDFDocumentGetCatalog(pdfDocument);
return returnDict;
}
void getDictionaryObjects (const char *key, CGPDFObjectRef object, void *info) {
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"key: %s", key]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
CGPDFDictionaryRef contentDict = (CGPDFDictionaryRef)info;
CGPDFObjectType type = CGPDFObjectGetType(object);
switch (type) {
case kCGPDFObjectTypeNull: {
[Log LogDebug:[NSString stringWithFormat:@"*****pdf null value"]];
break;
}
case kCGPDFObjectTypeBoolean: {
CGPDFBoolean objectBoolean;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeBoolean, &objectBoolean)) {
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf boolean value: %@", [NSNumber numberWithBool:objectBoolean]]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[[selfClass pdfDict] setObject:[NSNumber numberWithBool:objectBoolean]
forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
}
break;
}
case kCGPDFObjectTypeInteger: {
CGPDFInteger objectInteger;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeInteger, &objectInteger)) {
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf integer value: %ld", (long int)objectInteger]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[[selfClass pdfDict] setObject:[NSNumber numberWithInt:objectInteger]
forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
}
break;
}
case kCGPDFObjectTypeReal: {
CGPDFReal objectReal;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeReal, &objectReal)) {
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf real value: %ld", (long int)objectReal]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[[selfClass pdfDict] setObject:[NSNumber numberWithInt:objectReal]
forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
}
break;
}
case kCGPDFObjectTypeName: {
const char *name;
if (CGPDFDictionaryGetName(contentDict, key, &name))
{
NSString *dictName = [[NSString alloc] initWithCString:name encoding:NSUTF8StringEncoding];
if (dictName)
{
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf name value: %@", dictName]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[[selfClass pdfDict] setObject:dictName
forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
}
}
break;
}
case kCGPDFObjectTypeString: {
CGPDFStringRef objectString;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeString, &objectString)) {
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf string value: %@", (__bridge NSString *)CGPDFStringCopyTextString(objectString)]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[[selfClass pdfDict] setObject:(__bridge NSString *)CGPDFStringCopyTextString(objectString)
forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
}
break;
}
case kCGPDFObjectTypeArray: {
CGPDFArrayRef objectArray;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeArray, &objectArray)) {
NSArray *myArray=[selfClass copyPDFArray:objectArray referencingDictionary:contentDict referencingKey:key];
[[selfClass pdfDict] setObject:myArray
forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
}
break;
}
case kCGPDFObjectTypeDictionary: {
CGPDFDictionaryRef objectDictionary;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeDictionary, &objectDictionary)) {
NSString *logString = @"Found dictionary";
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
//[Log LogDebug:logString];
NSString *keyCheck = [[NSString alloc] initWithUTF8String:key];
if (![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"])
{
[selfClass setCatalogLevel:[selfClass catalogLevel] + 1];
CGPDFDictionaryApplyFunction(objectDictionary, getDictionaryObjects, objectDictionary);
[selfClass setCatalogLevel:[selfClass catalogLevel] - 1];
}
}
break;
}
case kCGPDFObjectTypeStream: {
CGPDFStreamRef objectStream;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeStream, &objectStream)) {
CGPDFDictionaryRef dict = CGPDFStreamGetDictionary( objectStream );
CGPDFDataFormat fmt = CGPDFDataFormatRaw;
CFDataRef streamData = CGPDFStreamCopyData(objectStream, &fmt);
NSData *data = [[NSData alloc] initWithData:(__bridge NSData *)(streamData)];
[data writeToFile:[FileHelpers pathInDocumentDirectory:@"data.dat"] atomically:YES];
NSString *dataString = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding];
//if (!dataString) {
// dataString = [[NSString alloc] initWithData:(__bridge NSData *)(streamData) encoding:NSUTF16StringEncoding];
// }
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf stream length: %ld - %@", (long int)CFDataGetLength( streamData ), dataString]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
NSString *keyCheck = [[NSString alloc] initWithUTF8String:key];
if( dict && ![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"])
{
[selfClass setCatalogLevel:[selfClass catalogLevel] + 1];
CGPDFDictionaryApplyFunction(dict, getDictionaryObjects, dict);
[selfClass setCatalogLevel:[selfClass catalogLevel] - 1];
}
}
}
}
}
- (NSArray *)copyPDFArray:(CGPDFArrayRef)arr referencingDictionary:(CGPDFDictionaryRef)dict referencingKey:(const char *)key
{
int i = 0;
NSMutableArray *temp = [[NSMutableArray alloc] init];
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array count: %zu", CGPDFArrayGetCount(arr)]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
for(i=0; i<CGPDFArrayGetCount(arr); i++){
CGPDFObjectRef object;
CGPDFArrayGetObject(arr, i, &object);
CGPDFObjectType type = CGPDFObjectGetType(object);
switch(type){
case kCGPDFObjectTypeNull: {
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array null(%d)", i]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
break;
}
case kCGPDFObjectTypeBoolean: {
CGPDFBoolean objectBool;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeBoolean, &objectBool)) {
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array boolean value(%d): %@", i, [NSNumber numberWithBool:objectBool]]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[temp addObject:[NSNumber numberWithBool:objectBool]];
}
break;
}
case kCGPDFObjectTypeInteger: {
CGPDFInteger objectInteger;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeInteger, &objectInteger)) {
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array integer value(%d): %ld", i, (long int)objectInteger]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[temp addObject:[NSNumber numberWithInt:objectInteger]];
}
break;
}
case kCGPDFObjectTypeReal:
{
CGPDFReal objectReal;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeReal, &objectReal))
{
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array real(%d): %ld", i, (long int)objectReal]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[temp addObject:[NSNumber numberWithInt:objectReal]];
}
break;
}
case kCGPDFObjectTypeName:
{
const char *name;
if (CGPDFDictionaryGetName(dict, key, &name))
{
NSString *dictName = [[NSString alloc] initWithCString:name encoding:NSUTF8StringEncoding];
if (dictName)
{
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array name value(%d): %@", i, dictName]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[[selfClass pdfDict] setObject:dictName
forKey:[NSString stringWithCString:key encoding:NSUTF8StringEncoding]];
}
}
break;
}
case kCGPDFObjectTypeString:
{
CGPDFStringRef objectString;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeString, &objectString))
{
NSString *tempStr = (__bridge NSString *)CGPDFStringCopyTextString(objectString);
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array string(%d): %@", i, tempStr]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
[temp addObject:tempStr];
}
break;
}
case kCGPDFObjectTypeArray :
{
CGPDFArrayRef objectArray;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeArray, &objectArray))
{
NSArray *tempArr = [selfClass copyPDFArray:objectArray referencingDictionary:dict referencingKey:key];
[temp addObject:tempArr];
}
break;
}
case kCGPDFObjectTypeDictionary :
{
CGPDFDictionaryRef objectDict;
NSString *keyCheck = [[NSString alloc] initWithUTF8String:key];
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeDictionary, &objectDict) && ![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"])
{
[selfClass setCatalogLevel:[selfClass catalogLevel] + 1];
CGPDFDictionaryApplyFunction( objectDict, getDictionaryObjects, objectDict);
[selfClass setCatalogLevel:[selfClass catalogLevel] - 1];
}
break;
}
case kCGPDFObjectTypeStream :
{
CGPDFStreamRef objectStream;
if (CGPDFObjectGetValue(object, kCGPDFObjectTypeStream, &objectStream))
{
CGPDFDictionaryRef streamDict = CGPDFStreamGetDictionary( objectStream );
CGPDFDataFormat fmt = CGPDFDataFormatRaw;
CFDataRef streamData = CGPDFStreamCopyData(objectStream, &fmt);
NSString *dataString = [[NSString alloc] initWithData:(__bridge NSData *)(streamData) encoding:NSUTF8StringEncoding];
NSString *logString = [[NSString alloc] initWithString:[NSString stringWithFormat:@"pdf array stream length: (%d): %ld - %@", i, (long int)CFDataGetLength( streamData ), dataString]];
for (int i = 0; i < [selfClass catalogLevel]; i++)
logString = [NSString stringWithFormat:@"-%@", logString];
[Log LogDebug:logString];
NSString *keyCheck = [[NSString alloc] initWithUTF8String:key];
if( streamDict && ![keyCheck isEqualToString:@"Parent"] && ![keyCheck isEqualToString:@"P"])
{
[selfClass setCatalogLevel:[selfClass catalogLevel] + 1];
CGPDFDictionaryApplyFunction( streamDict, getDictionaryObjects, streamDict );
[selfClass setCatalogLevel:[selfClass catalogLevel] - 1];
}
}
}
}
}
return temp;
}
@end