如果您打算在 Java 服务器上进行清理,我建议使用OWASP HTML Sanitizer,它显然基于 Caja 的代码。<a>
它包括清理要包含的元素的能力rel="nofollow"
。
import org.owasp.html.PolicyFactory;
import static org.owasp.html.Sanitizers.BLOCKS;
import static org.owasp.html.Sanitizers.FORMATTING;
import static org.owasp.html.Sanitizers.IMAGES;
import static org.owasp.html.Sanitizers.LINKS;
PolicyFactory sanitiser = BLOCKS.and(FORMATTING).and(IMAGES).and(LINKS);
String htmlSanitised = sanitiser.sanitize(htmlSource)
然而,要从 Java 调用 Caja,这适用于 Rhino (Java 7) 和 Nashorn (Java 8):
import javax.script.Bindings;
import javax.script.ScriptContext;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
public class CajaSanitiser {
private final ScriptEngine engine;
private final Bindings bindings;
public CajaSanitiser() throws IOException, ScriptException {
this.engine = new ScriptEngineManager().getEngineByName("js");
this.bindings = engine.getBindings(ScriptContext.ENGINE_SCOPE);
String scriptName = "com/google/caja/plugin/html-css-sanitizer-minified.js";
try (BufferedReader reader = getReader(scriptName)) {
engine.eval(reader);
}
String identity = "function identity(value) {return value;}";
engine.eval(identity);
}
private BufferedReader getReader(String name) {
return new BufferedReader(new InputStreamReader(
getClass().getClassLoader().getResourceAsStream(name)));
}
public String sanitise(String htmlSource) throws ScriptException {
bindings.put("src", htmlSource);
// You can use other functions beside 'identity' if you
// want to transform the html.
// See https://code.google.com/p/google-caja/wiki/JsHtmlSanitizer
return (String) engine.eval("html_sanitize(src, identity, identity)");
}
public static void main(String[] args) throws Exception {
CajaSanitiser sanitiser = new CajaSanitiser();
String source = "<html>\n" +
"<head>\n" +
"<style>\n" +
"h1 {color:blue;}\n" +
"</style>\n" +
"</head>\n" +
"<body>\n" +
"<h1>A heading</h1>\n" +
"</body>\n" +
"</html>";
System.out.println("Original HTML with CSS:");
System.out.println(source);
System.out.println();
System.out.println("Sanitised HTML:");
System.out.println(sanitiser.sanitise(source));
}
}
我将其用作 Maven 配置的一部分:
<dependencies>
<dependency>
<groupId>caja</groupId>
<artifactId>caja</artifactId>
<version>r5127</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>caja</id>
<name>caja</name>
<url>http://google-caja.googlecode.com/svn/maven</url>
</repository>
</repositories>