我正在开发一个 GWT 过滤器以使我的 GWT 应用程序可抓取。这个想法是,当它发现这样一个丑陋的 URL 时:
http://www.myapp.com/?_escaped_fragment_=v;id=Mv67mC13Yizr
介绍一个好的:
http://www.myapp.com/#!v;id=Mv67mC13Yizr
但是,代码永远不会到达 doFilter()。为什么?
Web.xml
<filter>
<filter-name>guiceFilter</filter-name>
<filter-class>com.google.inject.servlet.GuiceFilter</filter-class>
</filter>
<filter-mapping>
<filter-name>guiceFilter</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>
DispatchServletModule.java
public class DispatchServletModule extends ServletModule {
@Override
public void configureServlets() {
serve("/" + ActionImpl.DEFAULT_SERVICE_NAME)
.with(DispatchServiceImpl.class);
filter("/").through(CrawlerServiceImpl.class);
}
}
CrawlerServiceImpl.java
@Singleton
public final class CrawlerServiceImpl implements Filter {
private static final String ESCAPED_FRAGMENT_FORMAT1 = "_escaped_fragment_=";
private final int ESCAPED_FRAGMENT_LENGTH1 = ESCAPED_FRAGMENT_FORMAT1.length();
private static final String ESCAPED_FRAGMENT_FORMAT2 = "&"+ESCAPED_FRAGMENT_FORMAT1;
private final int ESCAPED_FRAGMENT_LENGTH2 = ESCAPED_FRAGMENT_FORMAT2.length();
@Inject(optional = true)
private final Provider<WebClient> webClientProvider = null;
@Override
public void init(FilterConfig filterConfig) throws ServletException {
}
@Override
public void destroy() {
}
@Override
public void doFilter(ServletRequest request, ServletResponse response,
FilterChain chain) throws IOException, ServletException {
HttpServletRequest req = (HttpServletRequest) request;
HttpServletResponse res = (HttpServletResponse) response;
String queryString = req.getQueryString();
final String requestURI = req.getRequestURI();
if ((queryString != null) && (queryString.contains(ESCAPED_FRAGMENT_FORMAT1))) {
try {
StringBuilder pageNameSb = new StringBuilder("http://");
pageNameSb.append(req.getServerName());
if (req.getServerPort() != 0) {
pageNameSb.append(":");
pageNameSb.append(req.getServerPort());
}
pageNameSb.append(requestURI);
queryString = rewriteQueryString(queryString);
pageNameSb.append(queryString);
String pageName = pageNameSb.toString();
WebClient webClient;
if( webClientProvider == null )
webClient = new WebClient(BrowserVersion.FIREFOX_3_6);
else
webClient = webClientProvider.get();
webClient.setThrowExceptionOnScriptError(false);
webClient.setJavaScriptEnabled(true);
HtmlPage page = webClient.getPage( pageName );
res.setContentType("text/html;charset=UTF-8");
PrintWriter out = res.getWriter();
out.println("<hr />");
out.println("<center><h3>You are viewing a non-interactive page that is intended for the crawler. "
+ "You probably want to see this page: <a href=\""
+ pageName
+ "\">"
+ pageName + "</a></h3></center>");
out.println("<hr />");
out.println(page.asXml());
webClient.closeAllWindows();
out.println("");
out.close();
}
catch( Exception e ) {
}
} else {
chain.doFilter(request, response);
}
}
private String rewriteQueryString(String queryString) throws UnsupportedEncodingException {
int index = queryString.indexOf(ESCAPED_FRAGMENT_FORMAT2);
int length = ESCAPED_FRAGMENT_LENGTH2;
if (index == -1) {
index = queryString.indexOf(ESCAPED_FRAGMENT_FORMAT1);
length = ESCAPED_FRAGMENT_LENGTH1;
}
if (index != -1) {
StringBuilder queryStringSb = new StringBuilder();
if (index > 0) {
queryStringSb.append("?");
queryStringSb.append(queryString.substring(0, index));
}
queryStringSb.append("#!");
queryStringSb.append(URLDecoder.decode(queryString.substring(index
+ length, queryString.length()), "UTF-8"));
return queryStringSb.toString();
}
return queryString;
}
}