我正在寻找一个可以直观地显示结构、字符/单词和样式的 html 差异的 api。该工具还必须支持双字节字符并且足够灵活,以便我将其添加到我现有的网站以轻松显示比较结果。我目前正在使用不支持双字节字符的组件软件 COM 实现,并且大约六年没有更新。
问问题
2094 次
2 回答
0
我发现可以做类似事情的唯一两个工具是http://changedetection.com和http://imnosy.com。两者都让您指定一个 url 并观察它们的变化。
于 2013-01-23T15:28:20.970 回答
0
这是我使用的:
[http://code.google.com/p/google-diff-match-patch/][1]
我必须编写自己的方法来进行比较,但经过一些工作它看起来不错。此实现比较传入的测试,因此如果您只是比较 2 个文本字符串,它可以正常工作。我的 diff_prettyHtml 调用更改为:
public string diff_prettyHtml(List<Diff> diffs)
{
StringBuilder html = new StringBuilder();
foreach (Diff aDiff in diffs)
{
string text = aDiff.text.Replace("&", "&").Replace("<", "<")
.Replace(">", ">").Replace("\n", "<br>");
switch (aDiff.operation)
{
case Operation.INSERT:
html.Append("<ins class='diff'>").Append(text)
.Append("</ins>");
break;
case Operation.DELETE:
html.Append("<del class='diff'>").Append(text)
.Append("</del>");
break;
case Operation.EQUAL:
html.Append("<span>").Append(text).Append("</span>");
break;
}
}
return html.ToString();
}
现在,如果您想对 2 个 html 字符串进行比较预览,这有点不同。这就是我所做的:
DiffMatchPatch.diff_match_patch diff = new DiffMatchPatch.diff_match_patch();
List<DiffMatchPatch.Diff> differences = diff.diff_main(oldHtml,
newHtml);
return diff.diff_previewHtml(differences);
public string diff_previewHtml(List<Diff> diffs) {
StringBuilder html = new StringBuilder();
foreach (Diff aDiff in diffs) {
string text = aDiff.text;
switch (aDiff.operation) {
case Operation.INSERT:
html.Append("<ins class='diff'>").Append(text)
.Append("</ins>");
break;
case Operation.DELETE:
html.Append("<del class='diff'>").Append(text)
.Append("</del>");
break;
case Operation.EQUAL:
html.Append(text);
break;
}
}
return html.ToString();
}
unicode类如下:
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Linq;
namespace HtmlCompare
{
class Unicoder
{
private Hashtable _htmlHash = new Hashtable();
private const string _htmlPattern = @"<(S*?)[^>]*>.*?|<.*?\/>";
private List<string> _blockElements = "img,br".Split(',').ToList<string>();
private int _currentHash = 44032;
public string pushHash(string tag)
{
if (_htmlHash[tag] == null)
{
//_htmlHash[tag] = char.Parse("\\u" + Convert.ToString(_currentHash,16));
_htmlHash[tag] = char.ConvertFromUtf32(_currentHash);
_currentHash++;
}
return _htmlHash[tag].ToString();
}
private string tagMatch(Match tag)
{
return pushHash(tag.Value);
}
public string html2plain(string html)
{
MatchEvaluator tagEvaluator = new MatchEvaluator(tagMatch);
return Regex.Replace(html, _htmlPattern, tagEvaluator, RegexOptions.IgnoreCase | RegexOptions.Multiline);
}
private string ProcessDiffTag(string tagStart, string tagEnd, string contents)
{
ArrayList diffTagParts = new ArrayList();
MatchCollection matches = Regex.Matches(contents,
_htmlPattern,
RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (matches.Count > 0)
{
int contentsStringIndex = 0;
int contentsStringEndIndex = 0;
int lastContentStringIndex = 0;
bool lastTag = false;
TagDefinition definition;
foreach (Match currentMatch in matches)
{
contentsStringIndex = currentMatch.Index;
contentsStringEndIndex = contentsStringIndex + currentMatch.Length;
lastTag = (currentMatch == matches[matches.Count - 1]);
// did we miss text that isn't a tag?
if (contentsStringIndex > lastContentStringIndex)
{
definition = new TagDefinition();
definition.Tag = false;
definition.Text = contents.Substring(lastContentStringIndex, contentsStringIndex - lastContentStringIndex);
AddTagDefinition(diffTagParts, definition);
}
else if (lastTag && contents.Length > contentsStringEndIndex) // something after the last tag?
{
definition = new TagDefinition();
definition.Tag = false;
definition.Text = contents.Substring(contentsStringEndIndex, contents.Length - contentsStringEndIndex);
AddTagDefinition(diffTagParts, definition);
}
// work on current tag
definition = new TagDefinition();
definition.Tag = true;
definition.OpeningTag = !IsClosingTag(currentMatch.Value);
definition.TagType = GetTagType(currentMatch.Value);
definition.Text = currentMatch.Value;
AddTagDefinition(diffTagParts, definition);
lastContentStringIndex = contentsStringEndIndex;
}
return GoThroughDiffParts(diffTagParts,
tagStart,
tagEnd);
}
else
return string.Concat(tagStart, contents, tagEnd);
}
private string GetTagType(string tag)
{
int startIndex = 1; // skip <
if (tag.StartsWith("</"))
startIndex = 2; // skip </
int endIndex = tag.IndexOf(" ");
if (endIndex == -1)
endIndex = tag.IndexOf(">");
return tag.Substring(startIndex, endIndex - startIndex);
}
private string GoThroughDiffParts(ArrayList parts, string startTag, string endTag)
{
IEnumerator enumerator = parts.GetEnumerator();
StringBuilder before = new StringBuilder(string.Empty);
StringBuilder middle = new StringBuilder(string.Empty);
StringBuilder after = new StringBuilder(string.Empty);
TagDefinition definition;
while (enumerator.MoveNext())
{
definition = (TagDefinition)enumerator.Current;
if (!definition.Used) // have we already used this part?
{
definition.Used = true;
if (_blockElements.Contains(definition.TagType))
middle.Append(definition.Text);
else if (definition.MatchingIndex == -1) // no matching tag
{
if (definition.Tag) // html tag?
{
if (definition.OpeningTag)
before.Append(definition.Text);
else
after.Append(definition.Text);
}
else
middle.Append(definition.Text);
}
else
{
if (!definition.Tag) // text and has a matching tag
{
TagDefinition matchingTag = (TagDefinition)parts[definition.MatchingIndex];
if (matchingTag.OpeningTag)
matchingTag.Text += definition.Text;
else
matchingTag.Text = string.Concat(definition.Text, matchingTag.Text);
definition.Used = true;
}
else
middle.Append(definition.Text);
}
}
}
bool includeDiffTag = true;
if (string.IsNullOrEmpty(middle.ToString()))
includeDiffTag = false; // we don't want the ins/del tag around nothing
else if (string.IsNullOrWhiteSpace(middle.ToString())) // spacing should be kept
middle = new StringBuilder(" " + middle.Replace("\n", "<br />"));
if(includeDiffTag)
middle.Insert(0, startTag); // <ins>[middle]
middle.Insert(0, before); // [before]<ins>[middle]
if (includeDiffTag)
middle.Append(endTag); // [before]<ins>[middle]</ins>
middle.Append(after); // [before]<ins>[middle]</ins>[end]
return middle.ToString();
}
private string DiffTagMatch(Match tag)
{
string tagStart = tag.Groups[1].Value;
string tagEnd = tag.Groups[5].Value;
string contents = tag.Groups[4].Value;
if (string.IsNullOrEmpty(contents))
return string.Empty; // we don't want the ins/del tag around nothing
else if (string.IsNullOrWhiteSpace(contents)) // spacing should be kept
return string.Concat(tagStart, " ", contents.Replace("\n", "<br />"), tagEnd);
else
return ProcessDiffTag(tagStart,
tagEnd,
contents);
}
private bool IsClosingTag(string tag)
{
return tag.Contains("</") && !tag.ToLower().Contains("<img") && !tag.ToLower().Contains("<br");
}
public string CleanUpMisplacedDiffTags(string html)
{
return Regex.Replace(html, @"(\<((ins|del).*?)\>)(.*?)(\<\/((ins|del).*?)\>)", DiffTagMatch, RegexOptions.IgnoreCase | RegexOptions.Multiline);
}
public string plain2html(string plain)
{
IDictionaryEnumerator enumerator = _htmlHash.GetEnumerator();
while (enumerator.MoveNext())
{
plain = Regex.Replace(plain,
_htmlHash[enumerator.Key].ToString(),
enumerator.Key.ToString(),
RegexOptions.IgnoreCase | RegexOptions.Multiline);
}
return CleanUpMisplacedDiffTags(plain);
}
private void AddTagDefinition(ArrayList list, TagDefinition tag)
{
IEnumerator enumerator = list.GetEnumerator();
TagDefinition currentDefinition;
int index = 0;
int insertingIndex = list.Count;
while (enumerator.MoveNext())
{
currentDefinition = (TagDefinition)enumerator.Current;
//if (!tag.OpeningTag && currentDefinition.MatchingIndex == -1)
// currentDefinition.MatchingIndex = insertingIndex;
if (tag.MatchingIndex == -1 && // matching tag not found yet
(currentDefinition.OpeningTag && !tag.OpeningTag) && // opening & closing
currentDefinition.TagType == currentDefinition.TagType) // same tag type
{
tag.MatchingIndex = index;
currentDefinition.MatchingIndex = insertingIndex;
}
}
list.Add(tag);
}
private class TagDefinition
{
public bool Tag { get; set; }
public string TagType { get; set; }
public string Text { get; set; }
public int MatchingIndex { get; set; }
public bool OpeningTag { get; set; }
public bool Used { get; set; }
public TagDefinition()
{
this.Tag = false;
this.Text = string.Empty;
this.TagType = string.Empty;
this.MatchingIndex = -1;
this.OpeningTag = false;
this.Used = false;
}
}
}
}
于 2013-11-20T15:07:42.880 回答