以下方法是微软的方法。此方法包含在示例“XAML 到 HTML 转换演示”中的 HtmlParser 类中,您可以在此处下载:https ://code.msdn.microsoft.com/windowsdesktop/XAML-to-HTML-Conversion-ed25a674/view/SourceCode 。
您可以在此处找到有关“HTML 剪贴板格式”的其他信息:https ://msdn.microsoft.com/en-us/library/aa767917(v=vs.85).aspx
/// <summary>
/// Extracts Html string from clipboard data by parsing header information in htmlDataString
/// </summary>
/// <param name="htmlDataString">
/// String representing Html clipboard data. This includes Html header
/// </param>
/// <returns>
/// String containing only the Html data part of htmlDataString, without header
/// </returns>
internal static string ExtractHtmlFromClipboardData(string htmlDataString)
{
int startHtmlIndex = htmlDataString.IndexOf("StartHTML:");
if (startHtmlIndex < 0)
{
return "ERROR: Urecognized html header";
}
// TODO: We assume that indices represented by strictly 10 zeros ("0123456789".Length),
// which could be wrong assumption. We need to implement more flrxible parsing here
startHtmlIndex = Int32.Parse(htmlDataString.Substring(startHtmlIndex + "StartHTML:".Length, "0123456789".Length));
if (startHtmlIndex < 0 || startHtmlIndex > htmlDataString.Length)
{
return "ERROR: Urecognized html header";
}
int endHtmlIndex = htmlDataString.IndexOf("EndHTML:");
if (endHtmlIndex < 0)
{
return "ERROR: Urecognized html header";
}
// TODO: We assume that indices represented by strictly 10 zeros ("0123456789".Length),
// which could be wrong assumption. We need to implement more flrxible parsing here
endHtmlIndex = Int32.Parse(htmlDataString.Substring(endHtmlIndex + "EndHTML:".Length, "0123456789".Length));
if (endHtmlIndex > htmlDataString.Length)
{
endHtmlIndex = htmlDataString.Length;
}
return htmlDataString.Substring(startHtmlIndex, endHtmlIndex - startHtmlIndex);
}
25.02.2015 添加
按照我的实施。我必须注意 UTF-8(参见 at 和 of 方法)
/// <summary>
/// Extracts selected Html fragment string from clipboard data by parsing header information
/// in htmlDataString
/// </summary>
/// <param name="htmlDataString">
/// String representing Html clipboard data. This includes Html header
/// </param>
/// <returns>
/// String containing only the Html selection part of htmlDataString, without header
/// </returns>
internal static string ExtractHtmlFragmentFromClipboardData(string htmlDataString)
{
// HTML Clipboard Format
// (https://msdn.microsoft.com/en-us/library/aa767917(v=vs.85).aspx)
// The fragment contains valid HTML representing the area the user has selected. This
// includes the information required for basic pasting of an HTML fragment, as follows:
// - Selected text.
// - Opening tags and attributes of any element that has an end tag within the selected text.
// - End tags that match the included opening tags.
// The fragment should be preceded and followed by the HTML comments <!--StartFragment--> and
// <!--EndFragment--> (no space allowed between the !-- and the text) to indicate where the
// fragment starts and ends. So the start and end of the fragment are indicated by these
// comments as well as by the StartFragment and EndFragment byte counts. Though redundant,
// this makes it easier to find the start of the fragment (from the byte count) and mark the
// position of the fragment directly in the HTML tree.
// Byte count from the beginning of the clipboard to the start of the fragment.
int startFragmentIndex = htmlDataString.IndexOf("StartFragment:");
if (startFragmentIndex < 0)
{
return "ERROR: Unrecognized html header";
}
// TODO: We assume that indices represented by strictly 10 zeros ("0123456789".Length),
// which could be wrong assumption. We need to implement more flrxible parsing here
startFragmentIndex = Int32.Parse(htmlDataString.Substring(startFragmentIndex + "StartFragment:".Length, 10));
if (startFragmentIndex < 0 || startFragmentIndex > htmlDataString.Length)
{
return "ERROR: Unrecognized html header";
}
// Byte count from the beginning of the clipboard to the end of the fragment.
int endFragmentIndex = htmlDataString.IndexOf("EndFragment:");
if (endFragmentIndex < 0)
{
return "ERROR: Unrecognized html header";
}
// TODO: We assume that indices represented by strictly 10 zeros ("0123456789".Length),
// which could be wrong assumption. We need to implement more flrxible parsing here
endFragmentIndex = Int32.Parse(htmlDataString.Substring(endFragmentIndex + "EndFragment:".Length, 10));
if (endFragmentIndex > htmlDataString.Length)
{
endFragmentIndex = htmlDataString.Length;
}
// CF_HTML is entirely text format and uses the transformation format UTF-8
byte[] bytes = Encoding.UTF8.GetBytes(htmlDataString);
return Encoding.UTF8.GetString(bytes, startFragmentIndex, endFragmentIndex - startFragmentIndex);
}