我正在使用 PDFClown 突出显示 PDF 文档中的多个搜索词。在许多包含彩色图像、复杂图表、彩色文本的 pdf 文档中,PDFClown 会在那里抛出异常并且无法突出显示匹配的单词。提到的代码适用于普通或简单的 Pdf。
这是我用于测试的 PDF https://drive.google.com/file/d/0B-nuOO6Zsa4rXy1DS2JjX1RnYmM/view?usp=sharing
public void searchWordInPdf(DocumentMetadata documentMetadata , String searchWord,
HttpServletResponse response)throws IOException{
try {
byte[] bytes = null;
org.pdfclown.files.File file =null;
if (documentMetadata.getProject().getFtpId() != null && documentMetadata.getProject().getFtpId() > 0) {
FtpServer ftpServer = ftpServerService.getFtpServer(documentMetadata.getProject().getFtpId());
ByteArrayOutputStream bos = new ByteArrayOutputStream();
retrievePdfFile(ftpServer, bos, documentMetadata.getFilePath());
bytes = bos.toByteArray();
file = new org.pdfclown.files.File(bytes);
}else{
file = new org.pdfclown.files.File(documentMetadata.getFilePath());
}
List<String> matchList = new ArrayList<String>();
//Pattern regex = Pattern.compile("[^\\s\"']+|\"([^\"]*)\"|'([^']*)'");
Pattern regex = Pattern.compile("[^\\s\"']+|\"([^\"]*)\"|'([^']*)'");
Matcher regexMatcher = regex.matcher(searchWord);
while (regexMatcher.find()) {
if (regexMatcher.group(1) != null) {
// Add double-quoted string without the quotes
matchList.add(regexMatcher.group(1));
} else if (regexMatcher.group(2) != null) {
// Add single-quoted string without the quotes
matchList.add(regexMatcher.group(2));
} else {
// Add unquoted word
matchList.add(regexMatcher.group());
}
}
for (String key : matchList){
Pattern pattern = Pattern.compile(key, Pattern.CASE_INSENSITIVE);
// 2. Iterating through the document pages...
TextExtractor textExtractor = new TextExtractor(true, true);
for (final Page page : file.getDocument().getPages()) {
System.out.println("\nScanning page " + (page.getIndex() + 1) + "...\n");
// 2.1. Extract the page text!
Map<Rectangle2D, List<ITextString>> textStrings = textExtractor.extract(page);
// 2.2. Find the text pattern matches!
final Matcher matcher = pattern.matcher(TextExtractor.toString(textStrings));
// 2.3. Highlight the text pattern matches!
textExtractor.filter(
textStrings,
new TextExtractor.IIntervalFilter() {
@Override
public boolean hasNext() {
if (matcher.find()) {
//count++;
return true;
}
return false;
}
@Override
public Interval<Integer> next() {
return new Interval<Integer>(matcher.start(), matcher.end());
}
@Override
public void process(
Interval<Integer> interval,
ITextString match
) {
Rectangle2D textBox = null;
// Defining the highlight box of the text pattern match...
List<Quad> highlightQuads = new ArrayList<Quad>();
{
/*
NOTE: A text pattern match may be split across multiple contiguous lines,
so we have to define a distinct highlight box for each text chunk.
*/
for (TextChar textChar : match.getTextChars()) {
Rectangle2D textCharBox = textChar.getBox();
if (textBox == null) {
textBox = (Rectangle2D) textCharBox.clone();
} else {
if (textCharBox.getY() > textBox.getMaxY()) {
highlightQuads.add(Quad.get(textBox));
textBox = (Rectangle2D) textCharBox.clone();
} else {
textBox.add(textCharBox);
}
}
}
highlightQuads.add(Quad.get(textBox));
}
// Highlight the text pattern match!
new TextMarkup(page, highlightQuads, null, MarkupTypeEnum.Highlight);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
);
}
}
String contentType = getContentType(documentMetadata.getFileName());
if (contentType == null) {
contentType = "binary/octet-stream";
}
response.setStatus(HttpStatus.OK.value());
ByteArrayOutputStream output = new ByteArrayOutputStream();
if(output != null){
file.save(output, SerializationModeEnum.Standard );
bytes = org.springframework.security.crypto.codec.Base64.encode(output.toByteArray());
response.addHeader("Content-Disposition", "attachment; filename=" + documentMetadata.getFileName());
response.addHeader("Content-Type", contentType);
response.getOutputStream().write(bytes);
}
} catch (Exception e) {
e.printStackTrace();
}
}
这是堆栈跟踪
java.lang.IllegalArgumentException: Comparison method violates its general contract!
at java.util.TimSort.mergeLo(TimSort.java:777)
at java.util.TimSort.mergeAt(TimSort.java:514)
at java.util.TimSort.mergeCollapse(TimSort.java:439)
at java.util.TimSort.sort(TimSort.java:245)
at java.util.Arrays.sort(Arrays.java:1512)
at java.util.ArrayList.sort(ArrayList.java:1454)
at java.util.Collections.sort(Collections.java:175)
at org.pdfclown.tools.TextExtractor.sort(TextExtractor.java:675)
at org.pdfclown.tools.TextExtractor.extract(TextExtractor.java:306)
at nu.optimise.projectweb.service.DocumentMetadataService.searchWordInPdf(DocumentMetadataService.java:2669)
at nu.optimise.projectweb.service.DocumentMetadataService$$FastClassBySpringCGLIB$$fc6434c2.invoke(<generated>)
at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:204)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:720)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:157)
at org.springframework.aop.aspectj.MethodInvocationProceedingJoinPoint.proceed(MethodInvocationProceedingJoinPoint.java:85)
at nu.optimise.projectweb.aop.logging.LoggingAspect.logAround(LoggingAspect.java:51)
at sun.reflect.GeneratedMethodAccessor186.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.springframework.aop.aspectj.AbstractAspectJAdvice.invokeAdviceMethodWithGivenArgs(AbstractAspectJAdvice.java:620)
at org.springframework.aop.aspectj.AbstractAspectJAdvice.invokeAdviceMethod(AbstractAspectJAdvice.java:609)
at org.springframework.aop.aspectj.AspectJAroundAdvice.invoke(AspectJAroundAdvice.java:68)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
at org.springframework.aop.aspectj.AspectJAfterThrowingAdvice.invoke(AspectJAfterThrowingAdvice.java:59)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
at org.springframework.transaction.interceptor.TransactionInterceptor$1.proceedWithInvocation(TransactionInterceptor.java:99)
at org.springframework.transaction.interceptor.TransactionAspectSupport.invokeWithinTransaction(TransactionAspectSupport.java:281)
at org.springframework.transaction.interceptor.TransactionInterceptor.invoke(TransactionInterceptor.java:96)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
at org.springframework.aop.interceptor.ExposeInvocationInterceptor.invoke(ExposeInvocationInterceptor.java:92)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
at org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:655)
at nu.optimise.projectweb.service.DocumentMetadataService$$EnhancerBySpringCGLIB$$c3a15a18.searchWordInPdf(<generated>)
at nu.optimise.projectweb.web.rest.DocumentMetadataResource.searchContentPDF(DocumentMetadataResource.java:1026)
at nu.optimise.projectweb.web.rest.DocumentMetadataResource$$FastClassBySpringCGLIB$$bb12eea8.invoke(<generated>)
at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:204)
at org.springframework.aop.framework.CglibAopProxy$CglibMethodInvocation.invokeJoinpoint(CglibAopProxy.java:720)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:157)
at org.springframework.aop.aspectj.MethodInvocationProceedingJoinPoint.proceed(MethodInvocationProceedingJoinPoint.java:85)
at nu.optimise.projectweb.aop.logging.LoggingAspect.logAround(LoggingAspect.java:51)
at sun.reflect.GeneratedMethodAccessor186.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.springframework.aop.aspectj.AbstractAspectJAdvice.invokeAdviceMethodWithGivenArgs(AbstractAspectJAdvice.java:620)
at org.springframework.aop.aspectj.AbstractAspectJAdvice.invokeAdviceMethod(AbstractAspectJAdvice.java:609)
at org.springframework.aop.aspectj.AspectJAroundAdvice.invoke(AspectJAroundAdvice.java:68)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
at org.springframework.aop.aspectj.AspectJAfterThrowingAdvice.invoke(AspectJAfterThrowingAdvice.java:59)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
at org.springframework.aop.interceptor.ExposeInvocationInterceptor.invoke(ExposeInvocationInterceptor.java:92)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
at com.ryantenney.metrics.spring.TimedMethodInterceptor.invoke(TimedMethodInterceptor.java:48)
at com.ryantenney.metrics.spring.TimedMethodInterceptor.invoke(TimedMethodInterceptor.java:34)
at com.ryantenney.metrics.spring.AbstractMetricMethodInterceptor.invoke(AbstractMetricMethodInterceptor.java:59)
at org.springframework.aop.framework.ReflectiveMethodInvocation.proceed(ReflectiveMethodInvocation.java:179)
at org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:655)
at nu.optimise.projectweb.web.rest.DocumentMetadataResource$$EnhancerBySpringCGLIB$$bfe48b3d.searchContentPDF(<generated>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.springframework.web.method.support.InvocableHandlerMethod.doInvoke(InvocableHandlerMethod.java:221)
at org.springframework.web.method.support.InvocableHandlerMethod.invokeForRequest(InvocableHandlerMethod.java:136)
at org.springframework.web.servlet.mvc.method.annotation.ServletInvocableHandlerMethod.invokeAndHandle(ServletInvocableHandlerMethod.java:110)
at org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.invokeHandlerMethod(RequestMappingHandlerAdapter.java:832)
at org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.handleInternal(RequestMappingHandlerAdapter.java:743)
at org.springframework.web.servlet.mvc.method.AbstractHandlerMethodAdapter.handle(AbstractHandlerMethodAdapter.java:85)
at org.springframework.web.servlet.DispatcherServlet.doDispatch(DispatcherServlet.java:961)
at org.springframework.web.servlet.DispatcherServlet.doService(DispatcherServlet.java:895)
at org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:967)
at org.springframework.web.servlet.FrameworkServlet.doGet(FrameworkServlet.java:858)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:622)
at org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:843)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:729)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:292)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at org.apache.tomcat.websocket.server.WsFilter.doFilter(WsFilter.java:52)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:240)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at com.codahale.metrics.servlet.AbstractInstrumentedFilter.doFilter(AbstractInstrumentedFilter.java:104)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:240)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at org.springframework.boot.actuate.autoconfigure.EndpointWebMvcAutoConfiguration$ApplicationContextHeaderFilter.doFilterInternal(EndpointWebMvcAutoConfiguration.java:281)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:240)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at org.springframework.boot.actuate.trace.WebRequestTraceFilter.doFilterInternal(WebRequestTraceFilter.java:115)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:240)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:317)
at org.springframework.security.web.access.intercept.FilterSecurityInterceptor.invoke(FilterSecurityInterceptor.java:127)
at org.springframework.security.web.access.intercept.FilterSecurityInterceptor.doFilter(FilterSecurityInterceptor.java:91)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.access.ExceptionTranslationFilter.doFilter(ExceptionTranslationFilter.java:115)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.session.SessionManagementFilter.doFilter(SessionManagementFilter.java:137)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.authentication.AnonymousAuthenticationFilter.doFilter(AnonymousAuthenticationFilter.java:112)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.servletapi.SecurityContextHolderAwareRequestFilter.doFilter(SecurityContextHolderAwareRequestFilter.java:169)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.savedrequest.RequestCacheAwareFilter.doFilter(RequestCacheAwareFilter.java:63)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at nu.optimise.projectweb.security.jwt.JWTFilter.doFilter(JWTFilter.java:43)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.authentication.logout.LogoutFilter.doFilter(LogoutFilter.java:121)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.header.HeaderWriterFilter.doFilterInternal(HeaderWriterFilter.java:66)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.context.SecurityContextPersistenceFilter.doFilter(SecurityContextPersistenceFilter.java:106)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.context.request.async.WebAsyncManagerIntegrationFilter.doFilterInternal(WebAsyncManagerIntegrationFilter.java:56)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:331)
at org.springframework.security.web.FilterChainProxy.doFilterInternal(FilterChainProxy.java:214)
at org.springframework.security.web.FilterChainProxy.doFilter(FilterChainProxy.java:177)
at org.springframework.web.filter.DelegatingFilterProxy.invokeDelegate(DelegatingFilterProxy.java:346)
at org.springframework.web.filter.DelegatingFilterProxy.doFilter(DelegatingFilterProxy.java:262)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:240)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at org.springframework.web.filter.RequestContextFilter.doFilterInternal(RequestContextFilter.java:99)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:240)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at org.springframework.web.filter.HttpPutFormContentFilter.doFilterInternal(HttpPutFormContentFilter.java:87)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:240)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at org.springframework.web.filter.HiddenHttpMethodFilter.doFilterInternal(HiddenHttpMethodFilter.java:77)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:240)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at org.springframework.web.filter.CharacterEncodingFilter.doFilterInternal(CharacterEncodingFilter.java:121)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:240)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:207)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:212)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:106)
at org.apache.catalina.authenticator.AuthenticatorBase.invoke(AuthenticatorBase.java:502)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:141)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:79)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:88)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:528)
at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:1099)
at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:670)
at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun(NioEndpoint.java:1520)
at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.run(NioEndpoint.java:1476)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run(TaskThread.java:61)
at java.lang.Thread.run(Thread.java:748)